push source

author: navewindre <boneyaard@gmail.com> 2025-07-18 07:17:16 +0200
committer: navewindre <boneyaard@gmail.com> 2025-07-18 07:17:16 +0200
commit: 3705cf352266bacb7eb9e40bb7921f9d4e8741d8 (patch)
tree: 066e661f4fe2f292e478387a2bb74fb60fb69665
parent: abf1d4c24bf8e5e8128e7636d5a9dc2b71d56ce8 (diff)
4 files changed, 816 insertions, 0 deletions
diff --git a/build.zig b/build.zig
new file mode 100644
index 0000000..c6aaa70
--- /dev/null
+++ b/build.zig
@@ -0,0 +1,43 @@
+const std = @import( "std" );
+
+pub fn build( b: *std.Build ) void {
+  const target = b.standardTargetOptions( .{} );
+  const optimize = b.standardOptimizeOption( .{} );
+  const exe_mod = b.createModule(.{
+    .root_source_file = b.path( "src/main.zig" ),
+    .target = target,
+    .optimize = optimize,
+  });
+
+  const exe = b.addExecutable(.{
+    .name = "zjisho",
+    .root_module = exe_mod,
+  } );
+
+  b.installArtifact( exe );
+  const run_cmd = b.addRunArtifact( exe );
+  const check_exe = b.addExecutable(.{
+    .name = "zjisho",
+    .root_module = exe_mod,
+  });
+
+  const check = b.step( "check", "check compile result" );
+  check.dependOn( &check_exe.step );
+
+  run_cmd.step.dependOn( b.getInstallStep( ));
+
+  if ( b.args ) |args| {
+    run_cmd.addArgs( args );
+  }
+
+  const run_step = b.step( "run", "Run the app" );
+  run_step.dependOn( &run_cmd.step );
+
+  const exe_unit_tests = b.addTest(.{
+    .root_module = exe_mod,
+  });
+
+  const run_exe_unit_tests = b.addRunArtifact( exe_unit_tests );
+  const test_step = b.step( "test", "Run unit tests" );
+  test_step.dependOn( &run_exe_unit_tests.step );
+}
diff --git a/build.zig.zon b/build.zig.zon
new file mode 100644
index 0000000..81a3523
--- /dev/null
+++ b/build.zig.zon
@@ -0,0 +1,17 @@
+.{
+    .name = .zjisho,
+    .version = "1.0.0",
+    .fingerprint = 0xa849130ea7f2b81e, // Changing this has security and trust implications.
+    .minimum_zig_version = "0.14.1",
+    .dependencies = .{
+    },
+
+    .paths = .{
+        "build.zig",
+        "build.zig.zon",
+        "src",
+        // For example...
+        //"LICENSE",
+        //"README.md",
+    },
+}
diff --git a/src/main.zig b/src/main.zig
new file mode 100644
index 0000000..62d3fa4
--- /dev/null
+++ b/src/main.zig
@@ -0,0 +1,163 @@
+const z = @import("std");
+const urlencode = @import("percent_encoding.zig");
+
+var gpa = z.heap.GeneralPurposeAllocator( .{ .thread_safe = true } ){};
+const alloc = gpa.allocator();
+
+const JishoData = struct {
+  pub const DataEntry = struct {
+    slug: []const u8,
+    japanese: []struct {
+      word: []const u8,
+      reading: []const u8,
+    },
+    senses: []struct {
+      english_definitions: [][]const u8
+    },
+  };
+
+  meta: struct { status: u32 },
+  data: []DataEntry,
+};
+
+/// freed by caller
+fn requestWord( word: []const u8 ) ![]const u8 {
+  var client = z.http.Client{ .allocator = alloc };
+  defer client.deinit();
+
+  const encoded = urlencode.encode_alloc( alloc, word, .{} ) catch |e| {
+    z.debug.print( "error encoding word {s}\n", .{ word } ); return e;
+  };
+  defer alloc.free( encoded );
+
+  const url = try z.fmt.allocPrint( alloc, "https://jisho.org/api/v1/search/words?keyword={s}", .{ encoded } );
+  defer alloc.free( url );
+  const uri = try z.Uri.parse( url );
+
+  var buf: [4096]u8 = undefined;
+  var req = try client.open( .GET, uri, .{ .server_header_buffer = &buf } );
+  defer req.deinit();
+
+  req.send() catch |e| { z.debug.print( "error sending request to {s}\n", .{ url } ); return e; };
+  req.finish() catch |e| { z.debug.print( "error sending request to {s}\n", .{ url } ); return e; };
+  req.wait() catch |e| { z.debug.print( "error sending request to {s}\n", .{ url } ); return e; };
+
+  if( req.response.status != .ok ) {
+    z.debug.print( "invalid response from {s}: {d}\n", .{ url, @intFromEnum( req.response.status ) } );
+    return error.InvalidResponse;
+  }
+
+  var reader = req.reader();
+  const body = try reader.readAllAlloc( alloc, 999999 );
+
+  return body;
+}
+
+fn formatDef( buf: []u8, data: *JishoData.DataEntry, definition_count: u32, sense_count: u32 ) ![]const u8 {
+  if( data.japanese.len == 0 ) {
+    return error.NotJapanese;
+  }
+
+  const wordb = try z.fmt.bufPrint( buf, "{s}（{s}） - ", .{ data.japanese[0].word, data.japanese[0].reading } );
+  var len = wordb.len;
+  var engb: []const u8 = buf[len..];
+  for( data.senses, 0.. ) |sense, i| {
+    if( i > sense_count ) {
+      engb = try z.fmt.bufPrint( buf[len..], ", etc...", .{} );
+      len += engb.len;
+      break;
+    }
+
+    for( sense.english_definitions, 0.. ) |definition, j| {
+      if( j > definition_count )
+        break
+      else if( j < definition_count and j < sense.english_definitions.len - 1 )
+        engb = try z.fmt.bufPrint( buf[len..], "{s}/", .{ definition } )
+      else
+        engb = try z.fmt.bufPrint( buf[len..], "{s}", .{ definition } );
+      len += engb.len;
+    }
+
+    if( i < sense_count and i < data.senses.len - 1 ) {
+      engb = try z.fmt.bufPrint( buf[len..], ", ", .{} );
+      len += engb.len;
+    }
+  }
+
+  return buf[0..len];
+}
+
+fn parseArgs( definitions_count: *u32, senses_count: *u32 ) ![]const u8 {
+  const args = try z.process.argsAlloc( alloc );
+  defer z.process.argsFree( alloc, args );
+  if( args.len < 2 ) {
+    z.debug.print( "usage: {s} [-d <definitions> -s <senses>] <word>\n", .{args[0]} );
+    return error.InvalidArgs;
+  }
+
+  var usedargs: u32 = 0;
+  for( args, 0.. ) |arg, i| {
+    if( z.mem.eql( u8, arg, "-d" ) ) {
+      if( i == args.len - 1 )
+        return error.InvalidDefinitionCount;
+
+      definitions_count.* = z.fmt.parseInt( u32, args[i + 1], 10 ) catch return error.InvalidSenseCount;
+      usedargs += 1;
+    }
+    if( z.mem.eql( u8, arg, "-s" ) ) {
+      if( i == args.len - 1 )
+        return error.InvalidSenseCount;
+
+      senses_count.* = z.fmt.parseInt( u32, args[i + 1], 10 ) catch return error.InvalidSenseCount;
+      usedargs += 1;
+    }
+  }
+
+  if( args.len - usedargs < 2 ) {
+    z.debug.print( "usage: {s} [-d <definitions> -s <senses>] <word>\n", .{args[0]} );
+    z.process.argsFree( alloc, args );
+    return error.InvalidArgCount;
+  }
+
+  return alloc.dupe( u8, args[args.len - 1] );
+}
+
+pub fn main() !void {
+  var definitions_count: u32 = 3;
+  var senses_count: u32 = 4;
+
+  const word = parseArgs( &definitions_count, &senses_count ) catch |e| {
+    z.debug.print( "failed to parse arguments: {any}\n", .{e} );
+    return;
+  };
+  const res = requestWord( word ) catch |e| {
+    z.debug.print( "failed to request word: {any}\n", .{e} );
+    return;
+  };
+  const parsed = z.json.parseFromSlice( JishoData, alloc, res, .{ .ignore_unknown_fields = true } ) catch |e| {
+    z.debug.print( "failed to parse response body ({any}): {s}\n", .{e, res} );
+    return;
+  };
+  if( parsed.value.data.len == 0 ) {
+    z.debug.print( "response empty: not japanese?\n", .{} );
+    return;
+  }
+
+  const data = &parsed.value.data[0];
+
+  // Good Enough:tm:
+  var buf: [64000]u8 = undefined;
+  const str = formatDef( &buf, data, definitions_count - 1, senses_count - 1 ) catch |e| {
+    if( e == error.NotJapanese ) {
+      z.debug.print( "response empty: not japanese?\n", .{} );
+      return;
+    }
+    return e;
+  };
+  try z.io.getStdOut().writer().print( "{s}\n", .{str} );
+
+  parsed.deinit();
+  alloc.free( word );
+  alloc.free( res );
+  _ = gpa.deinit();
+}
diff --git a/src/percent_encoding.zig b/src/percent_encoding.zig
new file mode 100644
index 0000000..6ada6b9
--- /dev/null
+++ b/src/percent_encoding.zig
@@ -0,0 +1,593 @@
+// credits to : https://github.com/bcrist/zig-percent-encoding
+
+pub const Encode_Type = enum {
+    raw,
+    percent_encoded,
+};
+pub const Encode_Type_Space = enum {
+    raw,
+    percent_encoded,
+    @"+",
+};
+pub const Encode_Options = struct {
+    alpha: Encode_Type = .raw, // [A-Za-z]
+    digits: Encode_Type = .raw, // [0-9]
+    spaces: Encode_Type_Space = .percent_encoded,
+    @"!": Encode_Type = .percent_encoded,
+    @"\"": Encode_Type = .percent_encoded,
+    @"#": Encode_Type = .percent_encoded,
+    @"$": Encode_Type = .percent_encoded,
+    @"%": Encode_Type = .percent_encoded,
+    @"&": Encode_Type = .percent_encoded,
+    @"'": Encode_Type = .percent_encoded,
+    @"(": Encode_Type = .percent_encoded,
+    @")": Encode_Type = .percent_encoded,
+    @"*": Encode_Type = .percent_encoded,
+    @"+": Encode_Type = .percent_encoded,
+    @",": Encode_Type = .percent_encoded,
+    @"-": Encode_Type = .raw,
+    @".": Encode_Type = .raw,
+    @"/": Encode_Type = .percent_encoded,
+    @":": Encode_Type = .percent_encoded,
+    @";": Encode_Type = .percent_encoded,
+    @"<": Encode_Type = .percent_encoded,
+    @"=": Encode_Type = .percent_encoded,
+    @">": Encode_Type = .percent_encoded,
+    @"?": Encode_Type = .percent_encoded,
+    @"@": Encode_Type = .percent_encoded,
+    @"[": Encode_Type = .percent_encoded,
+    @"\\": Encode_Type = .percent_encoded,
+    @"]": Encode_Type = .percent_encoded,
+    @"^": Encode_Type = .percent_encoded,
+    @"_": Encode_Type = .raw,
+    @"`": Encode_Type = .percent_encoded,
+    @"{": Encode_Type = .percent_encoded,
+    @"|": Encode_Type = .percent_encoded,
+    @"}": Encode_Type = .percent_encoded,
+    @"~": Encode_Type = .percent_encoded, // This is normally considered an unreserved character, but https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set includes it so we default to encoding it.
+    other: Encode_Type = .percent_encoded, // control chars, >= 0x80
+
+    pub fn should_encode(comptime self: Encode_Options, c: u8) bool {
+        if (self.alpha != self.other) switch (c | 0b00100000) {
+            'a'...'z' => return self.alpha != .raw,
+            else => {},
+        };
+        if (self.digits != self.other) switch (c) {
+            '0'...'9', '-', '.', '_', '~' => return self.digits != .raw,
+            else => {},
+        };
+
+        const spaces: Encode_Type = if (self.spaces == .raw) .raw else .percent_encoded;
+        if (spaces != self.other and c == ' ') return spaces != .raw;
+
+        if (self.@"!" != self.other and c == '!') return self.@"!" != .raw;
+        if (self.@"\"" != self.other and c == '"') return self.@"\"" != .raw;
+        if (self.@"#" != self.other and c == '#') return self.@"#" != .raw;
+        if (self.@"$" != self.other and c == '$') return self.@"$" != .raw;
+        if (self.@"%" != self.other and c == '%') return self.@"%" != .raw;
+        if (self.@"&" != self.other and c == '&') return self.@"&" != .raw;
+        if (self.@"'" != self.other and c == '\'') return self.@"'" != .raw;
+        if (self.@"(" != self.other and c == '(') return self.@"(" != .raw;
+        if (self.@")" != self.other and c == ')') return self.@")" != .raw;
+        if (self.@"*" != self.other and c == '*') return self.@"*" != .raw;
+        if (self.@"+" != self.other and c == '+') return self.@"+" != .raw;
+        if (self.@"," != self.other and c == ',') return self.@"," != .raw;
+        if (self.@"-" != self.other and c == '-') return self.@"-" != .raw;
+        if (self.@"." != self.other and c == '.') return self.@"." != .raw;
+        if (self.@"/" != self.other and c == '/') return self.@"/" != .raw;
+        if (self.@":" != self.other and c == ':') return self.@":" != .raw;
+        if (self.@";" != self.other and c == ';') return self.@";" != .raw;
+        if (self.@"<" != self.other and c == '<') return self.@"<" != .raw;
+        if (self.@"=" != self.other and c == '=') return self.@"=" != .raw;
+        if (self.@">" != self.other and c == '>') return self.@">" != .raw;
+        if (self.@"?" != self.other and c == '?') return self.@"?" != .raw;
+        if (self.@"@" != self.other and c == '@') return self.@"@" != .raw;
+        if (self.@"[" != self.other and c == '[') return self.@"[" != .raw;
+        if (self.@"\\" != self.other and c == '\\') return self.@"\\" != .raw;
+        if (self.@"]" != self.other and c == ']') return self.@"]" != .raw;
+        if (self.@"^" != self.other and c == '^') return self.@"^" != .raw;
+        if (self.@"_" != self.other and c == '_') return self.@"_" != .raw;
+        if (self.@"`" != self.other and c == '`') return self.@"`" != .raw;
+        if (self.@"{" != self.other and c == '{') return self.@"{" != .raw;
+        if (self.@"|" != self.other and c == '|') return self.@"|" != .raw;
+        if (self.@"}" != self.other and c == '}') return self.@"}" != .raw;
+        if (self.@"~" != self.other and c == '~') return self.@"~" != .raw;
+
+        return self.other != .raw;
+    }
+};
+
+pub fn encode_alloc(allocator: std.mem.Allocator, raw: []const u8, comptime options: Encode_Options) ![]const u8 {
+    if (raw.len == 0) return allocator.dupe(u8, raw);
+
+    var iter = encode(raw, options);
+    const first = iter.next().?;
+    if (first.len == raw.len and first.ptr == raw.ptr) return allocator.dupe(u8, raw);
+
+    var len = first.len;
+    while (iter.next()) |part| len += part.len;
+
+    var result = std.ArrayListUnmanaged(u8).initBuffer(try allocator.alloc(u8, len));
+
+    iter = encode(raw, options);
+    while (iter.next()) |part| {
+        result.appendSliceAssumeCapacity(part);
+    }
+
+    return result.items;
+}
+test encode_alloc {
+    try test_encode_alloc("", .{}, "");
+    try test_encode_alloc("Hellorld!", .{}, "Hellorld%21");
+    try test_encode_alloc("a b c", .{}, "a%20b%20c");
+    try test_encode_alloc("a b c", .{ .spaces = .@"+" }, "a+b+c");
+    try test_encode_alloc(" ", .{ .spaces = .percent_encoded }, "%20");
+    try test_encode_alloc("Hello World", .{ .spaces = .raw }, "Hello World");
+    try test_encode_alloc("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+    try test_encode_alloc("\x00\xFF", .{}, "%00%FF");
+    try test_encode_alloc("\x00\xFF", .{ .other = .raw }, "\x00\xFF");
+    try test_encode_alloc("!!", .{}, "%21%21");
+    try test_encode_alloc("!\"", .{}, "%21%22");
+    try test_encode_alloc("!#", .{}, "%21%23");
+    try test_encode_alloc("!$", .{}, "%21%24");
+    try test_encode_alloc("!%", .{}, "%21%25");
+    try test_encode_alloc("!&", .{}, "%21%26");
+    try test_encode_alloc("!'", .{}, "%21%27");
+    try test_encode_alloc("!(", .{}, "%21%28");
+    try test_encode_alloc("!)", .{}, "%21%29");
+    try test_encode_alloc("!*", .{}, "%21%2A");
+    try test_encode_alloc("!,", .{}, "%21%2C");
+    try test_encode_alloc("!/", .{}, "%21%2F");
+    try test_encode_alloc("!:", .{}, "%21%3A");
+    try test_encode_alloc("!;", .{}, "%21%3B");
+    try test_encode_alloc("!<", .{}, "%21%3C");
+    try test_encode_alloc("!=", .{}, "%21%3D");
+    try test_encode_alloc("!>", .{}, "%21%3E");
+    try test_encode_alloc("!?", .{}, "%21%3F");
+    try test_encode_alloc("!@", .{}, "%21%40");
+    try test_encode_alloc("![", .{}, "%21%5B");
+    try test_encode_alloc("!\\", .{}, "%21%5C");
+    try test_encode_alloc("!]", .{}, "%21%5D");
+    try test_encode_alloc("!^", .{}, "%21%5E");
+    try test_encode_alloc("!`", .{}, "%21%60");
+    try test_encode_alloc("!{", .{}, "%21%7B");
+    try test_encode_alloc("!|", .{}, "%21%7C");
+    try test_encode_alloc("!}", .{}, "%21%7D");
+    try test_encode_alloc("!!", .{ .@"!" = .raw }, "!!");
+    try test_encode_alloc("!#", .{ .@"#" = .raw }, "%21#");
+    try test_encode_alloc("!$", .{ .@"$" = .raw }, "%21$");
+    try test_encode_alloc("!&", .{ .@"&" = .raw }, "%21&");
+    try test_encode_alloc("!'", .{ .@"'" = .raw }, "%21'");
+    try test_encode_alloc("!(", .{ .@"(" = .raw }, "%21(");
+    try test_encode_alloc("!)", .{ .@")" = .raw }, "%21)");
+    try test_encode_alloc("!*", .{ .@"*" = .raw }, "%21*");
+    try test_encode_alloc("!,", .{ .@"," = .raw }, "%21,");
+    try test_encode_alloc("!/", .{ .@"/" = .raw }, "%21/");
+    try test_encode_alloc("!:", .{ .@":" = .raw }, "%21:");
+    try test_encode_alloc("!;", .{ .@";" = .raw }, "%21;");
+    try test_encode_alloc("!=", .{ .@"=" = .raw }, "%21=");
+    try test_encode_alloc("!?", .{ .@"?" = .raw }, "%21?");
+    try test_encode_alloc("!@", .{ .@"@" = .raw }, "%21@");
+    try test_encode_alloc("![", .{ .@"[" = .raw }, "%21[");
+    try test_encode_alloc("!]", .{ .@"]" = .raw }, "%21]");
+}
+fn test_encode_alloc(input: []const u8, comptime options: Encode_Options, expected: []const u8) !void {
+    const actual = try encode_alloc(std.testing.allocator, input, options);
+    defer std.testing.allocator.free(actual);
+    try std.testing.expectEqualStrings(expected, actual);
+}
+
+pub fn encode_maybe_append(list: *std.ArrayList(u8), raw: []const u8, comptime options: Encode_Options) ![]const u8 {
+    // `raw` must not reference the list's backing buffer, since it might be reallocated in this function.
+    std.debug.assert(@intFromPtr(raw.ptr) >= @intFromPtr(list.items.ptr + list.capacity)
+                  or @intFromPtr(list.items.ptr) >= @intFromPtr(raw.ptr + raw.len));
+
+    if (raw.len == 0) return raw;
+
+    var iter = encode(raw, options);
+    const first = iter.next().?;
+    if (first.len == raw.len and first.ptr == raw.ptr) return first;
+
+    const prefix_length = list.items.len;
+    try list.appendSlice(first);
+    while (iter.next()) |part| {
+        try list.appendSlice(part);
+    }
+
+    return list.items[prefix_length..];
+}
+test encode_maybe_append {
+    try test_encode_maybe_append("", .{}, "");
+    try test_encode_maybe_append("Hellorld!", .{}, "Hellorld%21");
+    try test_encode_maybe_append(" ", .{ .spaces = .percent_encoded }, "%20");
+    try test_encode_maybe_append("Hello World", .{ .spaces = .raw }, "Hello World");
+    try test_encode_maybe_append("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+    try test_encode_maybe_append("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{ .alpha = .percent_encoded, .digits = .percent_encoded }, "_.-%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F%50%51%52%53%54%55%56%57%58%59%5A%30%31%32%33%34%35%36%37%38%39%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F%70%71%72%73%74%75%76%77%78%79%7A");
+    try test_encode_maybe_append("\x00\xFF", .{}, "%00%FF");
+    try test_encode_maybe_append("\x00\xFF", .{ .other = .raw }, "\x00\xFF");
+}
+fn test_encode_maybe_append(input: []const u8, comptime options: Encode_Options, expected: []const u8) !void {
+    var temp = std.ArrayList(u8).init(std.testing.allocator);
+    defer temp.deinit();
+
+    const actual = try encode_maybe_append(&temp, input, options);
+    try std.testing.expectEqualStrings(expected, actual);
+}
+
+pub fn encode_append(list: *std.ArrayList(u8), raw: []const u8, comptime options: Encode_Options) !void {
+    var iter = encode(raw, options);
+    while (iter.next()) |part| {
+        try list.appendSlice(part);
+    }
+}
+test encode_append {
+    try test_encode_append("", .{}, "");
+    try test_encode_append("Hellorld!", .{}, "Hellorld%21");
+    try test_encode_append(" ", .{ .spaces = .percent_encoded }, "%20");
+    try test_encode_append("Hello World", .{ .spaces = .raw }, "Hello World");
+    try test_encode_append("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+    try test_encode_append("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{ .alpha = .percent_encoded, .digits = .percent_encoded }, "_.-%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F%50%51%52%53%54%55%56%57%58%59%5A%30%31%32%33%34%35%36%37%38%39%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F%70%71%72%73%74%75%76%77%78%79%7A");
+    try test_encode_append("\x00\xFF", .{}, "%00%FF");
+    try test_encode_append("\x00\xFF", .{ .other = .raw }, "\x00\xFF");
+}
+fn test_encode_append(input: []const u8, comptime options: Encode_Options, expected: []const u8) !void {
+    var temp = std.ArrayList(u8).init(std.testing.allocator);
+    defer temp.deinit();
+
+    try encode_append(&temp, input, options);
+    try std.testing.expectEqualStrings(expected, temp.items);
+}
+
+pub fn encode_writer(writer: anytype, input: []const u8, comptime options: Encode_Options) @TypeOf(writer).Error!void {
+    var encoder = encode(input, options);
+    while (encoder.next()) |chunk| {
+        try writer.writeAll(chunk);
+    }
+}
+
+pub fn encode(raw: []const u8, comptime options: Encode_Options) Encoder(options) {
+    return .{ .remaining = raw };
+}
+pub fn Encoder(comptime options: Encode_Options) type {
+    comptime if (options.spaces == .@"+") std.debug.assert(options.@"+" == .percent_encoded);
+    return struct {
+        remaining: []const u8,
+        temp: [3]u8 = "%00".*,
+
+        pub fn next(self: *@This()) ?[]const u8 {
+            const remaining = self.remaining;
+            if (remaining.len == 0) return null;
+
+            for (0.., remaining) |i, c| {
+                const should_encode = options.should_encode(c);
+
+                if (should_encode) {
+                    if (i > 0) {
+                        self.remaining = remaining[i..];
+                        return remaining[0..i];
+                    }
+                    var temp: []u8 = &self.temp;
+                    if (c == ' ' and options.spaces == .@"+") {
+                        temp = temp[2..];
+                        temp[0] = '+';
+                    } else {
+                        @memcpy(temp[1..], &std.fmt.bytesToHex(&[_]u8{c}, .upper));
+                    }
+                    self.remaining = remaining[1..];
+                    return temp;
+                }
+            }
+
+            self.remaining = "";
+            return remaining;
+        }
+    };
+}
+
+pub const Decode_Options = struct {
+    decode_plus_as_space: bool = true,
+};
+pub fn decode_alloc(allocator: std.mem.Allocator, encoded: []const u8, comptime options: Decode_Options) ![]const u8 {
+    if (encoded.len == 0) return try allocator.dupe(u8, encoded);
+
+    var iter = decode(encoded, options);
+    const first = iter.next().?;
+    if (first.len == encoded.len and first.ptr == encoded.ptr) return try allocator.dupe(u8, encoded);
+
+    var len = first.len;
+    while (iter.next()) |part| len += part.len;
+
+    var result = std.ArrayListUnmanaged(u8).initBuffer(try allocator.alloc(u8, len));
+
+    iter = decode(encoded, options);
+    while (iter.next()) |part| {
+        result.appendSliceAssumeCapacity(part);
+    }
+
+    return result.items;
+}
+test decode_alloc {
+    try test_decode_alloc("", .{}, "");
+    try test_decode_alloc("Hellorld!", .{}, "Hellorld!");
+    try test_decode_alloc("Hellorld%21", .{}, "Hellorld!");
+    try test_decode_alloc("a+b+c", .{}, "a b c");
+    try test_decode_alloc("+", .{ .decode_plus_as_space = false }, "+");
+    try test_decode_alloc("Hello%20World", .{}, "Hello World");
+    try test_decode_alloc("~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+    try test_decode_alloc("%00%FF", .{}, "\x00\xFF");
+    try test_decode_alloc("%21%21", .{}, "!!");
+    try test_decode_alloc("%21%22", .{}, "!\"");
+    try test_decode_alloc("%21%23", .{}, "!#");
+    try test_decode_alloc("%21%24", .{}, "!$");
+    try test_decode_alloc("%21%25", .{}, "!%");
+    try test_decode_alloc("%21%26", .{}, "!&");
+    try test_decode_alloc("%21%27", .{}, "!'");
+    try test_decode_alloc("%21%28", .{}, "!(");
+    try test_decode_alloc("%21%29", .{}, "!)");
+    try test_decode_alloc("%21%2A", .{}, "!*");
+    try test_decode_alloc("%21%2C", .{}, "!,");
+    try test_decode_alloc("%21%2F", .{}, "!/");
+    try test_decode_alloc("%21%3A", .{}, "!:");
+    try test_decode_alloc("%21%3B", .{}, "!;");
+    try test_decode_alloc("%21%3C", .{}, "!<");
+    try test_decode_alloc("%21%3D", .{}, "!=");
+    try test_decode_alloc("%21%3E", .{}, "!>");
+    try test_decode_alloc("%21%3F", .{}, "!?");
+    try test_decode_alloc("%21%40", .{}, "!@");
+    try test_decode_alloc("%21%5B", .{}, "![");
+    try test_decode_alloc("%21%5C", .{}, "!\\");
+    try test_decode_alloc("%21%5D", .{}, "!]");
+    try test_decode_alloc("%21%5E", .{}, "!^");
+    try test_decode_alloc("%21%60", .{}, "!`");
+    try test_decode_alloc("%21%7B", .{}, "!{");
+    try test_decode_alloc("%21%7C", .{}, "!|");
+    try test_decode_alloc("%21%7D", .{}, "!}");
+}
+fn test_decode_alloc(input: []const u8, comptime options: Decode_Options, expected: []const u8) !void {
+    const actual = try decode_alloc(std.testing.allocator, input, options);
+    defer std.testing.allocator.free(actual);
+    try std.testing.expectEqualStrings(expected, actual);
+}
+
+pub fn decode_maybe_append(list: *std.ArrayList(u8), encoded: []const u8, comptime options: Decode_Options) ![]const u8 {
+    // `encoded` must not reference the list's backing buffer, since it might be reallocated in this function.
+    std.debug.assert(@intFromPtr(encoded.ptr) >= @intFromPtr(list.items.ptr + list.capacity)
+                  or @intFromPtr(list.items.ptr) >= @intFromPtr(encoded.ptr + encoded.len));
+
+    if (encoded.len == 0) return encoded;
+
+    var iter = decode(encoded, options);
+    const first = iter.next().?;
+    if (first.len == encoded.len and first.ptr == encoded.ptr) return first;
+
+    const prefix_length = list.items.len;
+    try list.appendSlice(first);
+    while (iter.next()) |part| {
+        try list.appendSlice(part);
+    }
+
+    return list.items[prefix_length..];
+}
+test decode_maybe_append {
+    try test_decode_maybe_append("", .{}, "");
+    try test_decode_maybe_append("Hellorld!", .{}, "Hellorld!");
+    try test_decode_maybe_append("Hellorld%21", .{}, "Hellorld!");
+    try test_decode_maybe_append("a+b+c", .{}, "a b c");
+    try test_decode_maybe_append("+", .{ .decode_plus_as_space = false }, "+");
+    try test_decode_maybe_append("Hello%20World", .{}, "Hello World");
+    try test_decode_maybe_append("~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+}
+fn test_decode_maybe_append(input: []const u8, comptime options: Decode_Options, expected: []const u8) !void {
+    var temp = std.ArrayList(u8).init(std.testing.allocator);
+    defer temp.deinit();
+
+    const actual = try decode_maybe_append(&temp, input, options);
+    try std.testing.expectEqualStrings(expected, actual);
+}
+
+pub fn decode_append(list: *std.ArrayList(u8), encoded: []const u8, comptime options: Decode_Options) !void {
+    var iter = decode(encoded, options);
+    while (iter.next()) |part| {
+        try list.appendSlice(part);
+    }
+}
+test decode_append {
+    try test_decode_append("", .{}, "");
+    try test_decode_append("Hellorld!", .{}, "Hellorld!");
+    try test_decode_append("Hellorld%21", .{}, "Hellorld!");
+    try test_decode_append("a+b+c", .{}, "a b c");
+    try test_decode_append("+", .{ .decode_plus_as_space = false }, "+");
+    try test_decode_append("Hello%20World", .{}, "Hello World");
+    try test_decode_append("~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+}
+fn test_decode_append(input: []const u8, comptime options: Decode_Options, expected: []const u8) !void {
+    var temp = std.ArrayList(u8).init(std.testing.allocator);
+    defer temp.deinit();
+
+    try decode_append(&temp, input, options);
+    try std.testing.expectEqualStrings(expected, temp.items);
+}
+
+pub fn decode_in_place(encoded: []u8, comptime options: Decode_Options) []const u8 {
+    return decode_backwards(encoded, encoded, options);
+}
+
+pub fn decode_backwards(output: []u8, encoded: []const u8, comptime options: Decode_Options) []const u8 {
+    var remaining = output;
+    var iter = decode(encoded, options);
+    while (iter.next()) |span| {
+        std.mem.copyForwards(u8, remaining, span);
+        remaining = remaining[span.len..];
+    }
+    return output[0 .. output.len - remaining.len];
+}
+
+pub fn decode_writer(writer: anytype, encoded: []const u8, comptime options: Decode_Options) @TypeOf(writer).Error!void {
+    var iter = decode(encoded, options);
+    while (iter.next()) |part| {
+        try writer.writeAll(part);
+    }
+}
+
+pub fn decode(encoded: []const u8, comptime options: Decode_Options) Decoder(options) {
+    return .{ .remaining = encoded };
+}
+pub fn Decoder(comptime options: Decode_Options) type {
+    return struct {
+        remaining: []const u8,
+        temp: [1]u8 = undefined,
+
+        pub fn next(self: *@This()) ?[]const u8 {
+            const remaining = self.remaining;
+            if (remaining.len == 0) return null;
+
+            if (remaining[0] == '%') {
+                if (remaining.len >= 3) {
+                    self.temp[0] = std.fmt.parseInt(u8, remaining[1..3], 16) catch {
+                        self.remaining = remaining[1..];
+                        return remaining[0..1];
+                    };
+                    self.remaining = remaining[3..];
+                    return &self.temp;
+                } else {
+                    self.remaining = remaining[1..];
+                    return remaining[0..1];
+                }
+            } else if (options.decode_plus_as_space and remaining[0] == '+') {
+                self.temp[0] = ' ';
+                self.remaining = remaining[1..];
+                return &self.temp;
+            }
+
+            if (options.decode_plus_as_space) {
+                if (std.mem.indexOfAny(u8, remaining, "%+")) |end| {
+                    self.remaining = remaining[end..];
+                    return remaining[0..end];
+                }
+            } else {
+                if (std.mem.indexOfScalar(u8, remaining, '%')) |end| {
+                    self.remaining = remaining[end..];
+                    return remaining[0..end];
+                }
+            }
+
+            self.remaining = "";
+            return remaining;
+        }
+    };
+}
+
+pub fn fmtEncoded(raw: []const u8) std.fmt.Formatter(format) {
+    return .{ .data = raw };
+}
+
+fn format(raw: []const u8, comptime fmt: []const u8, _: std.fmt.FormatOptions, writer: anytype) @TypeOf(writer).Error!void {
+    comptime var encode_options: Encode_Options = .{};
+
+    if (fmt.len > 0) {
+        comptime var final_fmt = fmt;
+        comptime var apply_type: Encode_Type = .raw;
+        if (comptime std.mem.startsWith(u8, fmt, "allow")) {
+            final_fmt = fmt["allow".len..];
+        } else if (comptime std.mem.startsWith(u8, fmt, "except")) {
+            final_fmt = fmt["except".len..];
+            encode_options.@"-" = .percent_encoded;
+            encode_options.@"." = .percent_encoded;
+            encode_options.@"_" = .percent_encoded;
+            encode_options.@"~" = .percent_encoded;
+        } else if (comptime std.mem.startsWith(u8, fmt, "only")) {
+            final_fmt = fmt["only".len..];
+            apply_type = .percent_encoded;
+            encode_options.@"!" = .raw;
+            encode_options.@"\"" = .raw;
+            encode_options.@"#" = .raw;
+            encode_options.@"$" = .raw;
+            encode_options.@"%" = .raw;
+            encode_options.@"&" = .raw;
+            encode_options.@"'" = .raw;
+            encode_options.@"(" = .raw;
+            encode_options.@")" = .raw;
+            encode_options.@"*" = .raw;
+            encode_options.@"+" = .raw;
+            encode_options.@"," = .raw;
+            encode_options.@"/" = .raw;
+            encode_options.@":" = .raw;
+            encode_options.@";" = .raw;
+            encode_options.@"<" = .raw;
+            encode_options.@"=" = .raw;
+            encode_options.@">" = .raw;
+            encode_options.@"?" = .raw;
+            encode_options.@"@" = .raw;
+            encode_options.@"[" = .raw;
+            encode_options.@"\\" = .raw;
+            encode_options.@"]" = .raw;
+            encode_options.@"^" = .raw;
+            encode_options.@"`" = .raw;
+            encode_options.@"{" = .raw;
+            encode_options.@"|" = .raw;
+            encode_options.@"}" = .raw;
+        } else {
+            @compileError("Format string must be empty or begin with 'allow', 'except', or 'only', but found: " ++ fmt);
+        }
+        inline for (final_fmt) |c| switch (c) {
+            '!' => encode_options.@"!" = apply_type,
+            '"' => encode_options.@"\"" = apply_type,
+            '#' => encode_options.@"#" = apply_type,
+            '$' => encode_options.@"$" = apply_type,
+            '%' => encode_options.@"%" = apply_type,
+            '&' => encode_options.@"&" = apply_type,
+            '\'' => encode_options.@"'" = apply_type,
+            '(' => encode_options.@"(" = apply_type,
+            ')' => encode_options.@")" = apply_type,
+            '*' => encode_options.@"*" = apply_type,
+            '+' => encode_options.@"+" = apply_type,
+            ',' => encode_options.@"," = apply_type,
+            '-' => encode_options.@"-" = apply_type,
+            '.' => encode_options.@"." = apply_type,
+            '/' => encode_options.@"/" = apply_type,
+            'c' => encode_options.@":" = apply_type,
+            ';' => encode_options.@";" = apply_type,
+            '<' => encode_options.@"<" = apply_type,
+            '=' => encode_options.@"=" = apply_type,
+            '>' => encode_options.@">" = apply_type,
+            '?' => encode_options.@"?" = apply_type,
+            '@' => encode_options.@"@" = apply_type,
+            '[' => encode_options.@"[" = apply_type,
+            '\\' => encode_options.@"\\" = apply_type,
+            ']' => encode_options.@"]" = apply_type,
+            '^' => encode_options.@"^" = apply_type,
+            '_' => encode_options.@"_" = apply_type,
+            '`' => encode_options.@"`" = apply_type,
+            '{' => encode_options.@"{" = apply_type,
+            '|' => encode_options.@"|" = apply_type,
+            '}' => encode_options.@"}" = apply_type,
+            '~' => encode_options.@"~" = apply_type,
+            ' ' => encode_options.spaces = apply_type,
+            else => @compileError("invalid percent encoding specifier: " ++ fmt),
+        };
+        if (encode_options.@"+" == .raw and encode_options.spaces == .@"+") {
+            encode_options.spaces = .percent_encoded;
+        }
+    }
+
+    var encoder = encode(raw, encode_options);
+    while (encoder.next()) |chunk| {
+        try writer.writeAll(chunk);
+    }
+}
+
+test fmtEncoded {
+    try test_fmtEncoded("", "", "");
+    try test_fmtEncoded("Hellorld!", "", "Hellorld%21");
+    try test_fmtEncoded(" ", "", "%20");
+    try test_fmtEncoded("~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", "", "~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz");
+    try test_fmtEncoded("@*", "only*", "@%2A");
+    try test_fmtEncoded("[@*]", "except[]", "[%40%2A]");
+}
+fn test_fmtEncoded(input: []const u8, comptime fmt: []const u8, expected: []const u8) !void {
+    const temp = try std.fmt.allocPrint(std.testing.allocator, "{" ++ fmt ++ "}", .{ fmtEncoded(input) });
+    defer std.testing.allocator.free(temp);
+    try std.testing.expectEqualStrings(expected, temp);
+}
+
+const std = @import("std");
author	navewindre <boneyaard@gmail.com>	2025-07-18 07:17:16 +0200
committer	navewindre <boneyaard@gmail.com>	2025-07-18 07:17:16 +0200
commit	3705cf352266bacb7eb9e40bb7921f9d4e8741d8 (patch)
tree	066e661f4fe2f292e478387a2bb74fb60fb69665
parent	abf1d4c24bf8e5e8128e7636d5a9dc2b71d56ce8 (diff)