summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--build.zig43
-rw-r--r--build.zig.zon17
-rw-r--r--src/main.zig163
-rw-r--r--src/percent_encoding.zig593
4 files changed, 816 insertions, 0 deletions
diff --git a/build.zig b/build.zig
new file mode 100644
index 0000000..c6aaa70
--- /dev/null
+++ b/build.zig
@@ -0,0 +1,43 @@
+const std = @import( "std" );
+
+pub fn build( b: *std.Build ) void {
+ const target = b.standardTargetOptions( .{} );
+ const optimize = b.standardOptimizeOption( .{} );
+ const exe_mod = b.createModule(.{
+ .root_source_file = b.path( "src/main.zig" ),
+ .target = target,
+ .optimize = optimize,
+ });
+
+ const exe = b.addExecutable(.{
+ .name = "zjisho",
+ .root_module = exe_mod,
+ } );
+
+ b.installArtifact( exe );
+ const run_cmd = b.addRunArtifact( exe );
+ const check_exe = b.addExecutable(.{
+ .name = "zjisho",
+ .root_module = exe_mod,
+ });
+
+ const check = b.step( "check", "check compile result" );
+ check.dependOn( &check_exe.step );
+
+ run_cmd.step.dependOn( b.getInstallStep( ));
+
+ if ( b.args ) |args| {
+ run_cmd.addArgs( args );
+ }
+
+ const run_step = b.step( "run", "Run the app" );
+ run_step.dependOn( &run_cmd.step );
+
+ const exe_unit_tests = b.addTest(.{
+ .root_module = exe_mod,
+ });
+
+ const run_exe_unit_tests = b.addRunArtifact( exe_unit_tests );
+ const test_step = b.step( "test", "Run unit tests" );
+ test_step.dependOn( &run_exe_unit_tests.step );
+}
diff --git a/build.zig.zon b/build.zig.zon
new file mode 100644
index 0000000..81a3523
--- /dev/null
+++ b/build.zig.zon
@@ -0,0 +1,17 @@
+.{
+ .name = .zjisho,
+ .version = "1.0.0",
+ .fingerprint = 0xa849130ea7f2b81e, // Changing this has security and trust implications.
+ .minimum_zig_version = "0.14.1",
+ .dependencies = .{
+ },
+
+ .paths = .{
+ "build.zig",
+ "build.zig.zon",
+ "src",
+ // For example...
+ //"LICENSE",
+ //"README.md",
+ },
+}
diff --git a/src/main.zig b/src/main.zig
new file mode 100644
index 0000000..62d3fa4
--- /dev/null
+++ b/src/main.zig
@@ -0,0 +1,163 @@
+const z = @import("std");
+const urlencode = @import("percent_encoding.zig");
+
+var gpa = z.heap.GeneralPurposeAllocator( .{ .thread_safe = true } ){};
+const alloc = gpa.allocator();
+
+const JishoData = struct {
+ pub const DataEntry = struct {
+ slug: []const u8,
+ japanese: []struct {
+ word: []const u8,
+ reading: []const u8,
+ },
+ senses: []struct {
+ english_definitions: [][]const u8
+ },
+ };
+
+ meta: struct { status: u32 },
+ data: []DataEntry,
+};
+
+/// freed by caller
+fn requestWord( word: []const u8 ) ![]const u8 {
+ var client = z.http.Client{ .allocator = alloc };
+ defer client.deinit();
+
+ const encoded = urlencode.encode_alloc( alloc, word, .{} ) catch |e| {
+ z.debug.print( "error encoding word {s}\n", .{ word } ); return e;
+ };
+ defer alloc.free( encoded );
+
+ const url = try z.fmt.allocPrint( alloc, "https://jisho.org/api/v1/search/words?keyword={s}", .{ encoded } );
+ defer alloc.free( url );
+ const uri = try z.Uri.parse( url );
+
+ var buf: [4096]u8 = undefined;
+ var req = try client.open( .GET, uri, .{ .server_header_buffer = &buf } );
+ defer req.deinit();
+
+ req.send() catch |e| { z.debug.print( "error sending request to {s}\n", .{ url } ); return e; };
+ req.finish() catch |e| { z.debug.print( "error sending request to {s}\n", .{ url } ); return e; };
+ req.wait() catch |e| { z.debug.print( "error sending request to {s}\n", .{ url } ); return e; };
+
+ if( req.response.status != .ok ) {
+ z.debug.print( "invalid response from {s}: {d}\n", .{ url, @intFromEnum( req.response.status ) } );
+ return error.InvalidResponse;
+ }
+
+ var reader = req.reader();
+ const body = try reader.readAllAlloc( alloc, 999999 );
+
+ return body;
+}
+
+fn formatDef( buf: []u8, data: *JishoData.DataEntry, definition_count: u32, sense_count: u32 ) ![]const u8 {
+ if( data.japanese.len == 0 ) {
+ return error.NotJapanese;
+ }
+
+ const wordb = try z.fmt.bufPrint( buf, "{s}({s}) - ", .{ data.japanese[0].word, data.japanese[0].reading } );
+ var len = wordb.len;
+ var engb: []const u8 = buf[len..];
+ for( data.senses, 0.. ) |sense, i| {
+ if( i > sense_count ) {
+ engb = try z.fmt.bufPrint( buf[len..], ", etc...", .{} );
+ len += engb.len;
+ break;
+ }
+
+ for( sense.english_definitions, 0.. ) |definition, j| {
+ if( j > definition_count )
+ break
+ else if( j < definition_count and j < sense.english_definitions.len - 1 )
+ engb = try z.fmt.bufPrint( buf[len..], "{s}/", .{ definition } )
+ else
+ engb = try z.fmt.bufPrint( buf[len..], "{s}", .{ definition } );
+ len += engb.len;
+ }
+
+ if( i < sense_count and i < data.senses.len - 1 ) {
+ engb = try z.fmt.bufPrint( buf[len..], ", ", .{} );
+ len += engb.len;
+ }
+ }
+
+ return buf[0..len];
+}
+
+fn parseArgs( definitions_count: *u32, senses_count: *u32 ) ![]const u8 {
+ const args = try z.process.argsAlloc( alloc );
+ defer z.process.argsFree( alloc, args );
+ if( args.len < 2 ) {
+ z.debug.print( "usage: {s} [-d <definitions> -s <senses>] <word>\n", .{args[0]} );
+ return error.InvalidArgs;
+ }
+
+ var usedargs: u32 = 0;
+ for( args, 0.. ) |arg, i| {
+ if( z.mem.eql( u8, arg, "-d" ) ) {
+ if( i == args.len - 1 )
+ return error.InvalidDefinitionCount;
+
+ definitions_count.* = z.fmt.parseInt( u32, args[i + 1], 10 ) catch return error.InvalidSenseCount;
+ usedargs += 1;
+ }
+ if( z.mem.eql( u8, arg, "-s" ) ) {
+ if( i == args.len - 1 )
+ return error.InvalidSenseCount;
+
+ senses_count.* = z.fmt.parseInt( u32, args[i + 1], 10 ) catch return error.InvalidSenseCount;
+ usedargs += 1;
+ }
+ }
+
+ if( args.len - usedargs < 2 ) {
+ z.debug.print( "usage: {s} [-d <definitions> -s <senses>] <word>\n", .{args[0]} );
+ z.process.argsFree( alloc, args );
+ return error.InvalidArgCount;
+ }
+
+ return alloc.dupe( u8, args[args.len - 1] );
+}
+
+pub fn main() !void {
+ var definitions_count: u32 = 3;
+ var senses_count: u32 = 4;
+
+ const word = parseArgs( &definitions_count, &senses_count ) catch |e| {
+ z.debug.print( "failed to parse arguments: {any}\n", .{e} );
+ return;
+ };
+ const res = requestWord( word ) catch |e| {
+ z.debug.print( "failed to request word: {any}\n", .{e} );
+ return;
+ };
+ const parsed = z.json.parseFromSlice( JishoData, alloc, res, .{ .ignore_unknown_fields = true } ) catch |e| {
+ z.debug.print( "failed to parse response body ({any}): {s}\n", .{e, res} );
+ return;
+ };
+ if( parsed.value.data.len == 0 ) {
+ z.debug.print( "response empty: not japanese?\n", .{} );
+ return;
+ }
+
+ const data = &parsed.value.data[0];
+
+ // Good Enough:tm:
+ var buf: [64000]u8 = undefined;
+ const str = formatDef( &buf, data, definitions_count - 1, senses_count - 1 ) catch |e| {
+ if( e == error.NotJapanese ) {
+ z.debug.print( "response empty: not japanese?\n", .{} );
+ return;
+ }
+ return e;
+ };
+ try z.io.getStdOut().writer().print( "{s}\n", .{str} );
+
+ parsed.deinit();
+ alloc.free( word );
+ alloc.free( res );
+ _ = gpa.deinit();
+}
diff --git a/src/percent_encoding.zig b/src/percent_encoding.zig
new file mode 100644
index 0000000..6ada6b9
--- /dev/null
+++ b/src/percent_encoding.zig
@@ -0,0 +1,593 @@
+// credits to : https://github.com/bcrist/zig-percent-encoding
+
+pub const Encode_Type = enum {
+ raw,
+ percent_encoded,
+};
+pub const Encode_Type_Space = enum {
+ raw,
+ percent_encoded,
+ @"+",
+};
+pub const Encode_Options = struct {
+ alpha: Encode_Type = .raw, // [A-Za-z]
+ digits: Encode_Type = .raw, // [0-9]
+ spaces: Encode_Type_Space = .percent_encoded,
+ @"!": Encode_Type = .percent_encoded,
+ @"\"": Encode_Type = .percent_encoded,
+ @"#": Encode_Type = .percent_encoded,
+ @"$": Encode_Type = .percent_encoded,
+ @"%": Encode_Type = .percent_encoded,
+ @"&": Encode_Type = .percent_encoded,
+ @"'": Encode_Type = .percent_encoded,
+ @"(": Encode_Type = .percent_encoded,
+ @")": Encode_Type = .percent_encoded,
+ @"*": Encode_Type = .percent_encoded,
+ @"+": Encode_Type = .percent_encoded,
+ @",": Encode_Type = .percent_encoded,
+ @"-": Encode_Type = .raw,
+ @".": Encode_Type = .raw,
+ @"/": Encode_Type = .percent_encoded,
+ @":": Encode_Type = .percent_encoded,
+ @";": Encode_Type = .percent_encoded,
+ @"<": Encode_Type = .percent_encoded,
+ @"=": Encode_Type = .percent_encoded,
+ @">": Encode_Type = .percent_encoded,
+ @"?": Encode_Type = .percent_encoded,
+ @"@": Encode_Type = .percent_encoded,
+ @"[": Encode_Type = .percent_encoded,
+ @"\\": Encode_Type = .percent_encoded,
+ @"]": Encode_Type = .percent_encoded,
+ @"^": Encode_Type = .percent_encoded,
+ @"_": Encode_Type = .raw,
+ @"`": Encode_Type = .percent_encoded,
+ @"{": Encode_Type = .percent_encoded,
+ @"|": Encode_Type = .percent_encoded,
+ @"}": Encode_Type = .percent_encoded,
+ @"~": Encode_Type = .percent_encoded, // This is normally considered an unreserved character, but https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set includes it so we default to encoding it.
+ other: Encode_Type = .percent_encoded, // control chars, >= 0x80
+
+ pub fn should_encode(comptime self: Encode_Options, c: u8) bool {
+ if (self.alpha != self.other) switch (c | 0b00100000) {
+ 'a'...'z' => return self.alpha != .raw,
+ else => {},
+ };
+ if (self.digits != self.other) switch (c) {
+ '0'...'9', '-', '.', '_', '~' => return self.digits != .raw,
+ else => {},
+ };
+
+ const spaces: Encode_Type = if (self.spaces == .raw) .raw else .percent_encoded;
+ if (spaces != self.other and c == ' ') return spaces != .raw;
+
+ if (self.@"!" != self.other and c == '!') return self.@"!" != .raw;
+ if (self.@"\"" != self.other and c == '"') return self.@"\"" != .raw;
+ if (self.@"#" != self.other and c == '#') return self.@"#" != .raw;
+ if (self.@"$" != self.other and c == '$') return self.@"$" != .raw;
+ if (self.@"%" != self.other and c == '%') return self.@"%" != .raw;
+ if (self.@"&" != self.other and c == '&') return self.@"&" != .raw;
+ if (self.@"'" != self.other and c == '\'') return self.@"'" != .raw;
+ if (self.@"(" != self.other and c == '(') return self.@"(" != .raw;
+ if (self.@")" != self.other and c == ')') return self.@")" != .raw;
+ if (self.@"*" != self.other and c == '*') return self.@"*" != .raw;
+ if (self.@"+" != self.other and c == '+') return self.@"+" != .raw;
+ if (self.@"," != self.other and c == ',') return self.@"," != .raw;
+ if (self.@"-" != self.other and c == '-') return self.@"-" != .raw;
+ if (self.@"." != self.other and c == '.') return self.@"." != .raw;
+ if (self.@"/" != self.other and c == '/') return self.@"/" != .raw;
+ if (self.@":" != self.other and c == ':') return self.@":" != .raw;
+ if (self.@";" != self.other and c == ';') return self.@";" != .raw;
+ if (self.@"<" != self.other and c == '<') return self.@"<" != .raw;
+ if (self.@"=" != self.other and c == '=') return self.@"=" != .raw;
+ if (self.@">" != self.other and c == '>') return self.@">" != .raw;
+ if (self.@"?" != self.other and c == '?') return self.@"?" != .raw;
+ if (self.@"@" != self.other and c == '@') return self.@"@" != .raw;
+ if (self.@"[" != self.other and c == '[') return self.@"[" != .raw;
+ if (self.@"\\" != self.other and c == '\\') return self.@"\\" != .raw;
+ if (self.@"]" != self.other and c == ']') return self.@"]" != .raw;
+ if (self.@"^" != self.other and c == '^') return self.@"^" != .raw;
+ if (self.@"_" != self.other and c == '_') return self.@"_" != .raw;
+ if (self.@"`" != self.other and c == '`') return self.@"`" != .raw;
+ if (self.@"{" != self.other and c == '{') return self.@"{" != .raw;
+ if (self.@"|" != self.other and c == '|') return self.@"|" != .raw;
+ if (self.@"}" != self.other and c == '}') return self.@"}" != .raw;
+ if (self.@"~" != self.other and c == '~') return self.@"~" != .raw;
+
+ return self.other != .raw;
+ }
+};
+
+pub fn encode_alloc(allocator: std.mem.Allocator, raw: []const u8, comptime options: Encode_Options) ![]const u8 {
+ if (raw.len == 0) return allocator.dupe(u8, raw);
+
+ var iter = encode(raw, options);
+ const first = iter.next().?;
+ if (first.len == raw.len and first.ptr == raw.ptr) return allocator.dupe(u8, raw);
+
+ var len = first.len;
+ while (iter.next()) |part| len += part.len;
+
+ var result = std.ArrayListUnmanaged(u8).initBuffer(try allocator.alloc(u8, len));
+
+ iter = encode(raw, options);
+ while (iter.next()) |part| {
+ result.appendSliceAssumeCapacity(part);
+ }
+
+ return result.items;
+}
+test encode_alloc {
+ try test_encode_alloc("", .{}, "");
+ try test_encode_alloc("Hellorld!", .{}, "Hellorld%21");
+ try test_encode_alloc("a b c", .{}, "a%20b%20c");
+ try test_encode_alloc("a b c", .{ .spaces = .@"+" }, "a+b+c");
+ try test_encode_alloc(" ", .{ .spaces = .percent_encoded }, "%20");
+ try test_encode_alloc("Hello World", .{ .spaces = .raw }, "Hello World");
+ try test_encode_alloc("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+ try test_encode_alloc("\x00\xFF", .{}, "%00%FF");
+ try test_encode_alloc("\x00\xFF", .{ .other = .raw }, "\x00\xFF");
+ try test_encode_alloc("!!", .{}, "%21%21");
+ try test_encode_alloc("!\"", .{}, "%21%22");
+ try test_encode_alloc("!#", .{}, "%21%23");
+ try test_encode_alloc("!$", .{}, "%21%24");
+ try test_encode_alloc("!%", .{}, "%21%25");
+ try test_encode_alloc("!&", .{}, "%21%26");
+ try test_encode_alloc("!'", .{}, "%21%27");
+ try test_encode_alloc("!(", .{}, "%21%28");
+ try test_encode_alloc("!)", .{}, "%21%29");
+ try test_encode_alloc("!*", .{}, "%21%2A");
+ try test_encode_alloc("!,", .{}, "%21%2C");
+ try test_encode_alloc("!/", .{}, "%21%2F");
+ try test_encode_alloc("!:", .{}, "%21%3A");
+ try test_encode_alloc("!;", .{}, "%21%3B");
+ try test_encode_alloc("!<", .{}, "%21%3C");
+ try test_encode_alloc("!=", .{}, "%21%3D");
+ try test_encode_alloc("!>", .{}, "%21%3E");
+ try test_encode_alloc("!?", .{}, "%21%3F");
+ try test_encode_alloc("!@", .{}, "%21%40");
+ try test_encode_alloc("![", .{}, "%21%5B");
+ try test_encode_alloc("!\\", .{}, "%21%5C");
+ try test_encode_alloc("!]", .{}, "%21%5D");
+ try test_encode_alloc("!^", .{}, "%21%5E");
+ try test_encode_alloc("!`", .{}, "%21%60");
+ try test_encode_alloc("!{", .{}, "%21%7B");
+ try test_encode_alloc("!|", .{}, "%21%7C");
+ try test_encode_alloc("!}", .{}, "%21%7D");
+ try test_encode_alloc("!!", .{ .@"!" = .raw }, "!!");
+ try test_encode_alloc("!#", .{ .@"#" = .raw }, "%21#");
+ try test_encode_alloc("!$", .{ .@"$" = .raw }, "%21$");
+ try test_encode_alloc("!&", .{ .@"&" = .raw }, "%21&");
+ try test_encode_alloc("!'", .{ .@"'" = .raw }, "%21'");
+ try test_encode_alloc("!(", .{ .@"(" = .raw }, "%21(");
+ try test_encode_alloc("!)", .{ .@")" = .raw }, "%21)");
+ try test_encode_alloc("!*", .{ .@"*" = .raw }, "%21*");
+ try test_encode_alloc("!,", .{ .@"," = .raw }, "%21,");
+ try test_encode_alloc("!/", .{ .@"/" = .raw }, "%21/");
+ try test_encode_alloc("!:", .{ .@":" = .raw }, "%21:");
+ try test_encode_alloc("!;", .{ .@";" = .raw }, "%21;");
+ try test_encode_alloc("!=", .{ .@"=" = .raw }, "%21=");
+ try test_encode_alloc("!?", .{ .@"?" = .raw }, "%21?");
+ try test_encode_alloc("!@", .{ .@"@" = .raw }, "%21@");
+ try test_encode_alloc("![", .{ .@"[" = .raw }, "%21[");
+ try test_encode_alloc("!]", .{ .@"]" = .raw }, "%21]");
+}
+fn test_encode_alloc(input: []const u8, comptime options: Encode_Options, expected: []const u8) !void {
+ const actual = try encode_alloc(std.testing.allocator, input, options);
+ defer std.testing.allocator.free(actual);
+ try std.testing.expectEqualStrings(expected, actual);
+}
+
+pub fn encode_maybe_append(list: *std.ArrayList(u8), raw: []const u8, comptime options: Encode_Options) ![]const u8 {
+ // `raw` must not reference the list's backing buffer, since it might be reallocated in this function.
+ std.debug.assert(@intFromPtr(raw.ptr) >= @intFromPtr(list.items.ptr + list.capacity)
+ or @intFromPtr(list.items.ptr) >= @intFromPtr(raw.ptr + raw.len));
+
+ if (raw.len == 0) return raw;
+
+ var iter = encode(raw, options);
+ const first = iter.next().?;
+ if (first.len == raw.len and first.ptr == raw.ptr) return first;
+
+ const prefix_length = list.items.len;
+ try list.appendSlice(first);
+ while (iter.next()) |part| {
+ try list.appendSlice(part);
+ }
+
+ return list.items[prefix_length..];
+}
+test encode_maybe_append {
+ try test_encode_maybe_append("", .{}, "");
+ try test_encode_maybe_append("Hellorld!", .{}, "Hellorld%21");
+ try test_encode_maybe_append(" ", .{ .spaces = .percent_encoded }, "%20");
+ try test_encode_maybe_append("Hello World", .{ .spaces = .raw }, "Hello World");
+ try test_encode_maybe_append("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+ try test_encode_maybe_append("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{ .alpha = .percent_encoded, .digits = .percent_encoded }, "_.-%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F%50%51%52%53%54%55%56%57%58%59%5A%30%31%32%33%34%35%36%37%38%39%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F%70%71%72%73%74%75%76%77%78%79%7A");
+ try test_encode_maybe_append("\x00\xFF", .{}, "%00%FF");
+ try test_encode_maybe_append("\x00\xFF", .{ .other = .raw }, "\x00\xFF");
+}
+fn test_encode_maybe_append(input: []const u8, comptime options: Encode_Options, expected: []const u8) !void {
+ var temp = std.ArrayList(u8).init(std.testing.allocator);
+ defer temp.deinit();
+
+ const actual = try encode_maybe_append(&temp, input, options);
+ try std.testing.expectEqualStrings(expected, actual);
+}
+
+pub fn encode_append(list: *std.ArrayList(u8), raw: []const u8, comptime options: Encode_Options) !void {
+ var iter = encode(raw, options);
+ while (iter.next()) |part| {
+ try list.appendSlice(part);
+ }
+}
+test encode_append {
+ try test_encode_append("", .{}, "");
+ try test_encode_append("Hellorld!", .{}, "Hellorld%21");
+ try test_encode_append(" ", .{ .spaces = .percent_encoded }, "%20");
+ try test_encode_append("Hello World", .{ .spaces = .raw }, "Hello World");
+ try test_encode_append("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+ try test_encode_append("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{ .alpha = .percent_encoded, .digits = .percent_encoded }, "_.-%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F%50%51%52%53%54%55%56%57%58%59%5A%30%31%32%33%34%35%36%37%38%39%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F%70%71%72%73%74%75%76%77%78%79%7A");
+ try test_encode_append("\x00\xFF", .{}, "%00%FF");
+ try test_encode_append("\x00\xFF", .{ .other = .raw }, "\x00\xFF");
+}
+fn test_encode_append(input: []const u8, comptime options: Encode_Options, expected: []const u8) !void {
+ var temp = std.ArrayList(u8).init(std.testing.allocator);
+ defer temp.deinit();
+
+ try encode_append(&temp, input, options);
+ try std.testing.expectEqualStrings(expected, temp.items);
+}
+
+pub fn encode_writer(writer: anytype, input: []const u8, comptime options: Encode_Options) @TypeOf(writer).Error!void {
+ var encoder = encode(input, options);
+ while (encoder.next()) |chunk| {
+ try writer.writeAll(chunk);
+ }
+}
+
+pub fn encode(raw: []const u8, comptime options: Encode_Options) Encoder(options) {
+ return .{ .remaining = raw };
+}
+pub fn Encoder(comptime options: Encode_Options) type {
+ comptime if (options.spaces == .@"+") std.debug.assert(options.@"+" == .percent_encoded);
+ return struct {
+ remaining: []const u8,
+ temp: [3]u8 = "%00".*,
+
+ pub fn next(self: *@This()) ?[]const u8 {
+ const remaining = self.remaining;
+ if (remaining.len == 0) return null;
+
+ for (0.., remaining) |i, c| {
+ const should_encode = options.should_encode(c);
+
+ if (should_encode) {
+ if (i > 0) {
+ self.remaining = remaining[i..];
+ return remaining[0..i];
+ }
+ var temp: []u8 = &self.temp;
+ if (c == ' ' and options.spaces == .@"+") {
+ temp = temp[2..];
+ temp[0] = '+';
+ } else {
+ @memcpy(temp[1..], &std.fmt.bytesToHex(&[_]u8{c}, .upper));
+ }
+ self.remaining = remaining[1..];
+ return temp;
+ }
+ }
+
+ self.remaining = "";
+ return remaining;
+ }
+ };
+}
+
+pub const Decode_Options = struct {
+ decode_plus_as_space: bool = true,
+};
+pub fn decode_alloc(allocator: std.mem.Allocator, encoded: []const u8, comptime options: Decode_Options) ![]const u8 {
+ if (encoded.len == 0) return try allocator.dupe(u8, encoded);
+
+ var iter = decode(encoded, options);
+ const first = iter.next().?;
+ if (first.len == encoded.len and first.ptr == encoded.ptr) return try allocator.dupe(u8, encoded);
+
+ var len = first.len;
+ while (iter.next()) |part| len += part.len;
+
+ var result = std.ArrayListUnmanaged(u8).initBuffer(try allocator.alloc(u8, len));
+
+ iter = decode(encoded, options);
+ while (iter.next()) |part| {
+ result.appendSliceAssumeCapacity(part);
+ }
+
+ return result.items;
+}
+test decode_alloc {
+ try test_decode_alloc("", .{}, "");
+ try test_decode_alloc("Hellorld!", .{}, "Hellorld!");
+ try test_decode_alloc("Hellorld%21", .{}, "Hellorld!");
+ try test_decode_alloc("a+b+c", .{}, "a b c");
+ try test_decode_alloc("+", .{ .decode_plus_as_space = false }, "+");
+ try test_decode_alloc("Hello%20World", .{}, "Hello World");
+ try test_decode_alloc("~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+ try test_decode_alloc("%00%FF", .{}, "\x00\xFF");
+ try test_decode_alloc("%21%21", .{}, "!!");
+ try test_decode_alloc("%21%22", .{}, "!\"");
+ try test_decode_alloc("%21%23", .{}, "!#");
+ try test_decode_alloc("%21%24", .{}, "!$");
+ try test_decode_alloc("%21%25", .{}, "!%");
+ try test_decode_alloc("%21%26", .{}, "!&");
+ try test_decode_alloc("%21%27", .{}, "!'");
+ try test_decode_alloc("%21%28", .{}, "!(");
+ try test_decode_alloc("%21%29", .{}, "!)");
+ try test_decode_alloc("%21%2A", .{}, "!*");
+ try test_decode_alloc("%21%2C", .{}, "!,");
+ try test_decode_alloc("%21%2F", .{}, "!/");
+ try test_decode_alloc("%21%3A", .{}, "!:");
+ try test_decode_alloc("%21%3B", .{}, "!;");
+ try test_decode_alloc("%21%3C", .{}, "!<");
+ try test_decode_alloc("%21%3D", .{}, "!=");
+ try test_decode_alloc("%21%3E", .{}, "!>");
+ try test_decode_alloc("%21%3F", .{}, "!?");
+ try test_decode_alloc("%21%40", .{}, "!@");
+ try test_decode_alloc("%21%5B", .{}, "![");
+ try test_decode_alloc("%21%5C", .{}, "!\\");
+ try test_decode_alloc("%21%5D", .{}, "!]");
+ try test_decode_alloc("%21%5E", .{}, "!^");
+ try test_decode_alloc("%21%60", .{}, "!`");
+ try test_decode_alloc("%21%7B", .{}, "!{");
+ try test_decode_alloc("%21%7C", .{}, "!|");
+ try test_decode_alloc("%21%7D", .{}, "!}");
+}
+fn test_decode_alloc(input: []const u8, comptime options: Decode_Options, expected: []const u8) !void {
+ const actual = try decode_alloc(std.testing.allocator, input, options);
+ defer std.testing.allocator.free(actual);
+ try std.testing.expectEqualStrings(expected, actual);
+}
+
+pub fn decode_maybe_append(list: *std.ArrayList(u8), encoded: []const u8, comptime options: Decode_Options) ![]const u8 {
+ // `encoded` must not reference the list's backing buffer, since it might be reallocated in this function.
+ std.debug.assert(@intFromPtr(encoded.ptr) >= @intFromPtr(list.items.ptr + list.capacity)
+ or @intFromPtr(list.items.ptr) >= @intFromPtr(encoded.ptr + encoded.len));
+
+ if (encoded.len == 0) return encoded;
+
+ var iter = decode(encoded, options);
+ const first = iter.next().?;
+ if (first.len == encoded.len and first.ptr == encoded.ptr) return first;
+
+ const prefix_length = list.items.len;
+ try list.appendSlice(first);
+ while (iter.next()) |part| {
+ try list.appendSlice(part);
+ }
+
+ return list.items[prefix_length..];
+}
+test decode_maybe_append {
+ try test_decode_maybe_append("", .{}, "");
+ try test_decode_maybe_append("Hellorld!", .{}, "Hellorld!");
+ try test_decode_maybe_append("Hellorld%21", .{}, "Hellorld!");
+ try test_decode_maybe_append("a+b+c", .{}, "a b c");
+ try test_decode_maybe_append("+", .{ .decode_plus_as_space = false }, "+");
+ try test_decode_maybe_append("Hello%20World", .{}, "Hello World");
+ try test_decode_maybe_append("~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+}
+fn test_decode_maybe_append(input: []const u8, comptime options: Decode_Options, expected: []const u8) !void {
+ var temp = std.ArrayList(u8).init(std.testing.allocator);
+ defer temp.deinit();
+
+ const actual = try decode_maybe_append(&temp, input, options);
+ try std.testing.expectEqualStrings(expected, actual);
+}
+
+pub fn decode_append(list: *std.ArrayList(u8), encoded: []const u8, comptime options: Decode_Options) !void {
+ var iter = decode(encoded, options);
+ while (iter.next()) |part| {
+ try list.appendSlice(part);
+ }
+}
+test decode_append {
+ try test_decode_append("", .{}, "");
+ try test_decode_append("Hellorld!", .{}, "Hellorld!");
+ try test_decode_append("Hellorld%21", .{}, "Hellorld!");
+ try test_decode_append("a+b+c", .{}, "a b c");
+ try test_decode_append("+", .{ .decode_plus_as_space = false }, "+");
+ try test_decode_append("Hello%20World", .{}, "Hello World");
+ try test_decode_append("~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+}
+fn test_decode_append(input: []const u8, comptime options: Decode_Options, expected: []const u8) !void {
+ var temp = std.ArrayList(u8).init(std.testing.allocator);
+ defer temp.deinit();
+
+ try decode_append(&temp, input, options);
+ try std.testing.expectEqualStrings(expected, temp.items);
+}
+
+pub fn decode_in_place(encoded: []u8, comptime options: Decode_Options) []const u8 {
+ return decode_backwards(encoded, encoded, options);
+}
+
+pub fn decode_backwards(output: []u8, encoded: []const u8, comptime options: Decode_Options) []const u8 {
+ var remaining = output;
+ var iter = decode(encoded, options);
+ while (iter.next()) |span| {
+ std.mem.copyForwards(u8, remaining, span);
+ remaining = remaining[span.len..];
+ }
+ return output[0 .. output.len - remaining.len];
+}
+
+pub fn decode_writer(writer: anytype, encoded: []const u8, comptime options: Decode_Options) @TypeOf(writer).Error!void {
+ var iter = decode(encoded, options);
+ while (iter.next()) |part| {
+ try writer.writeAll(part);
+ }
+}
+
+pub fn decode(encoded: []const u8, comptime options: Decode_Options) Decoder(options) {
+ return .{ .remaining = encoded };
+}
+pub fn Decoder(comptime options: Decode_Options) type {
+ return struct {
+ remaining: []const u8,
+ temp: [1]u8 = undefined,
+
+ pub fn next(self: *@This()) ?[]const u8 {
+ const remaining = self.remaining;
+ if (remaining.len == 0) return null;
+
+ if (remaining[0] == '%') {
+ if (remaining.len >= 3) {
+ self.temp[0] = std.fmt.parseInt(u8, remaining[1..3], 16) catch {
+ self.remaining = remaining[1..];
+ return remaining[0..1];
+ };
+ self.remaining = remaining[3..];
+ return &self.temp;
+ } else {
+ self.remaining = remaining[1..];
+ return remaining[0..1];
+ }
+ } else if (options.decode_plus_as_space and remaining[0] == '+') {
+ self.temp[0] = ' ';
+ self.remaining = remaining[1..];
+ return &self.temp;
+ }
+
+ if (options.decode_plus_as_space) {
+ if (std.mem.indexOfAny(u8, remaining, "%+")) |end| {
+ self.remaining = remaining[end..];
+ return remaining[0..end];
+ }
+ } else {
+ if (std.mem.indexOfScalar(u8, remaining, '%')) |end| {
+ self.remaining = remaining[end..];
+ return remaining[0..end];
+ }
+ }
+
+ self.remaining = "";
+ return remaining;
+ }
+ };
+}
+
+pub fn fmtEncoded(raw: []const u8) std.fmt.Formatter(format) {
+ return .{ .data = raw };
+}
+
+fn format(raw: []const u8, comptime fmt: []const u8, _: std.fmt.FormatOptions, writer: anytype) @TypeOf(writer).Error!void {
+ comptime var encode_options: Encode_Options = .{};
+
+ if (fmt.len > 0) {
+ comptime var final_fmt = fmt;
+ comptime var apply_type: Encode_Type = .raw;
+ if (comptime std.mem.startsWith(u8, fmt, "allow")) {
+ final_fmt = fmt["allow".len..];
+ } else if (comptime std.mem.startsWith(u8, fmt, "except")) {
+ final_fmt = fmt["except".len..];
+ encode_options.@"-" = .percent_encoded;
+ encode_options.@"." = .percent_encoded;
+ encode_options.@"_" = .percent_encoded;
+ encode_options.@"~" = .percent_encoded;
+ } else if (comptime std.mem.startsWith(u8, fmt, "only")) {
+ final_fmt = fmt["only".len..];
+ apply_type = .percent_encoded;
+ encode_options.@"!" = .raw;
+ encode_options.@"\"" = .raw;
+ encode_options.@"#" = .raw;
+ encode_options.@"$" = .raw;
+ encode_options.@"%" = .raw;
+ encode_options.@"&" = .raw;
+ encode_options.@"'" = .raw;
+ encode_options.@"(" = .raw;
+ encode_options.@")" = .raw;
+ encode_options.@"*" = .raw;
+ encode_options.@"+" = .raw;
+ encode_options.@"," = .raw;
+ encode_options.@"/" = .raw;
+ encode_options.@":" = .raw;
+ encode_options.@";" = .raw;
+ encode_options.@"<" = .raw;
+ encode_options.@"=" = .raw;
+ encode_options.@">" = .raw;
+ encode_options.@"?" = .raw;
+ encode_options.@"@" = .raw;
+ encode_options.@"[" = .raw;
+ encode_options.@"\\" = .raw;
+ encode_options.@"]" = .raw;
+ encode_options.@"^" = .raw;
+ encode_options.@"`" = .raw;
+ encode_options.@"{" = .raw;
+ encode_options.@"|" = .raw;
+ encode_options.@"}" = .raw;
+ } else {
+ @compileError("Format string must be empty or begin with 'allow', 'except', or 'only', but found: " ++ fmt);
+ }
+ inline for (final_fmt) |c| switch (c) {
+ '!' => encode_options.@"!" = apply_type,
+ '"' => encode_options.@"\"" = apply_type,
+ '#' => encode_options.@"#" = apply_type,
+ '$' => encode_options.@"$" = apply_type,
+ '%' => encode_options.@"%" = apply_type,
+ '&' => encode_options.@"&" = apply_type,
+ '\'' => encode_options.@"'" = apply_type,
+ '(' => encode_options.@"(" = apply_type,
+ ')' => encode_options.@")" = apply_type,
+ '*' => encode_options.@"*" = apply_type,
+ '+' => encode_options.@"+" = apply_type,
+ ',' => encode_options.@"," = apply_type,
+ '-' => encode_options.@"-" = apply_type,
+ '.' => encode_options.@"." = apply_type,
+ '/' => encode_options.@"/" = apply_type,
+ 'c' => encode_options.@":" = apply_type,
+ ';' => encode_options.@";" = apply_type,
+ '<' => encode_options.@"<" = apply_type,
+ '=' => encode_options.@"=" = apply_type,
+ '>' => encode_options.@">" = apply_type,
+ '?' => encode_options.@"?" = apply_type,
+ '@' => encode_options.@"@" = apply_type,
+ '[' => encode_options.@"[" = apply_type,
+ '\\' => encode_options.@"\\" = apply_type,
+ ']' => encode_options.@"]" = apply_type,
+ '^' => encode_options.@"^" = apply_type,
+ '_' => encode_options.@"_" = apply_type,
+ '`' => encode_options.@"`" = apply_type,
+ '{' => encode_options.@"{" = apply_type,
+ '|' => encode_options.@"|" = apply_type,
+ '}' => encode_options.@"}" = apply_type,
+ '~' => encode_options.@"~" = apply_type,
+ ' ' => encode_options.spaces = apply_type,
+ else => @compileError("invalid percent encoding specifier: " ++ fmt),
+ };
+ if (encode_options.@"+" == .raw and encode_options.spaces == .@"+") {
+ encode_options.spaces = .percent_encoded;
+ }
+ }
+
+ var encoder = encode(raw, encode_options);
+ while (encoder.next()) |chunk| {
+ try writer.writeAll(chunk);
+ }
+}
+
+test fmtEncoded {
+ try test_fmtEncoded("", "", "");
+ try test_fmtEncoded("Hellorld!", "", "Hellorld%21");
+ try test_fmtEncoded(" ", "", "%20");
+ try test_fmtEncoded("~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", "", "~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+ try test_fmtEncoded("@*", "only*", "@%2A");
+ try test_fmtEncoded("[@*]", "except[]", "[%40%2A]");
+}
+fn test_fmtEncoded(input: []const u8, comptime fmt: []const u8, expected: []const u8) !void {
+ const temp = try std.fmt.allocPrint(std.testing.allocator, "{" ++ fmt ++ "}", .{ fmtEncoded(input) });
+ defer std.testing.allocator.free(temp);
+ try std.testing.expectEqualStrings(expected, temp);
+}
+
+const std = @import("std");