diff options
| author | navewindre <boneyaard@gmail.com> | 2025-07-18 07:17:16 +0200 |
|---|---|---|
| committer | navewindre <boneyaard@gmail.com> | 2025-07-18 07:17:16 +0200 |
| commit | 3705cf352266bacb7eb9e40bb7921f9d4e8741d8 (patch) | |
| tree | 066e661f4fe2f292e478387a2bb74fb60fb69665 | |
| parent | abf1d4c24bf8e5e8128e7636d5a9dc2b71d56ce8 (diff) | |
push source
| -rw-r--r-- | build.zig | 43 | ||||
| -rw-r--r-- | build.zig.zon | 17 | ||||
| -rw-r--r-- | src/main.zig | 163 | ||||
| -rw-r--r-- | src/percent_encoding.zig | 593 |
4 files changed, 816 insertions, 0 deletions
diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..c6aaa70 --- /dev/null +++ b/build.zig @@ -0,0 +1,43 @@ +const std = @import( "std" ); + +pub fn build( b: *std.Build ) void { + const target = b.standardTargetOptions( .{} ); + const optimize = b.standardOptimizeOption( .{} ); + const exe_mod = b.createModule(.{ + .root_source_file = b.path( "src/main.zig" ), + .target = target, + .optimize = optimize, + }); + + const exe = b.addExecutable(.{ + .name = "zjisho", + .root_module = exe_mod, + } ); + + b.installArtifact( exe ); + const run_cmd = b.addRunArtifact( exe ); + const check_exe = b.addExecutable(.{ + .name = "zjisho", + .root_module = exe_mod, + }); + + const check = b.step( "check", "check compile result" ); + check.dependOn( &check_exe.step ); + + run_cmd.step.dependOn( b.getInstallStep( )); + + if ( b.args ) |args| { + run_cmd.addArgs( args ); + } + + const run_step = b.step( "run", "Run the app" ); + run_step.dependOn( &run_cmd.step ); + + const exe_unit_tests = b.addTest(.{ + .root_module = exe_mod, + }); + + const run_exe_unit_tests = b.addRunArtifact( exe_unit_tests ); + const test_step = b.step( "test", "Run unit tests" ); + test_step.dependOn( &run_exe_unit_tests.step ); +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..81a3523 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,17 @@ +.{ + .name = .zjisho, + .version = "1.0.0", + .fingerprint = 0xa849130ea7f2b81e, // Changing this has security and trust implications. + .minimum_zig_version = "0.14.1", + .dependencies = .{ + }, + + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..62d3fa4 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,163 @@ +const z = @import("std"); +const urlencode = @import("percent_encoding.zig"); + +var gpa = z.heap.GeneralPurposeAllocator( .{ .thread_safe = true } ){}; +const alloc = gpa.allocator(); + +const JishoData = struct { + pub const DataEntry = struct { + slug: []const u8, + japanese: []struct { + word: []const u8, + reading: []const u8, + }, + senses: []struct { + english_definitions: [][]const u8 + }, + }; + + meta: struct { status: u32 }, + data: []DataEntry, +}; + +/// freed by caller +fn requestWord( word: []const u8 ) ![]const u8 { + var client = z.http.Client{ .allocator = alloc }; + defer client.deinit(); + + const encoded = urlencode.encode_alloc( alloc, word, .{} ) catch |e| { + z.debug.print( "error encoding word {s}\n", .{ word } ); return e; + }; + defer alloc.free( encoded ); + + const url = try z.fmt.allocPrint( alloc, "https://jisho.org/api/v1/search/words?keyword={s}", .{ encoded } ); + defer alloc.free( url ); + const uri = try z.Uri.parse( url ); + + var buf: [4096]u8 = undefined; + var req = try client.open( .GET, uri, .{ .server_header_buffer = &buf } ); + defer req.deinit(); + + req.send() catch |e| { z.debug.print( "error sending request to {s}\n", .{ url } ); return e; }; + req.finish() catch |e| { z.debug.print( "error sending request to {s}\n", .{ url } ); return e; }; + req.wait() catch |e| { z.debug.print( "error sending request to {s}\n", .{ url } ); return e; }; + + if( req.response.status != .ok ) { + z.debug.print( "invalid response from {s}: {d}\n", .{ url, @intFromEnum( req.response.status ) } ); + return error.InvalidResponse; + } + + var reader = req.reader(); + const body = try reader.readAllAlloc( alloc, 999999 ); + + return body; +} + +fn formatDef( buf: []u8, data: *JishoData.DataEntry, definition_count: u32, sense_count: u32 ) ![]const u8 { + if( data.japanese.len == 0 ) { + return error.NotJapanese; + } + + const wordb = try z.fmt.bufPrint( buf, "{s}({s}) - ", .{ data.japanese[0].word, data.japanese[0].reading } ); + var len = wordb.len; + var engb: []const u8 = buf[len..]; + for( data.senses, 0.. ) |sense, i| { + if( i > sense_count ) { + engb = try z.fmt.bufPrint( buf[len..], ", etc...", .{} ); + len += engb.len; + break; + } + + for( sense.english_definitions, 0.. ) |definition, j| { + if( j > definition_count ) + break + else if( j < definition_count and j < sense.english_definitions.len - 1 ) + engb = try z.fmt.bufPrint( buf[len..], "{s}/", .{ definition } ) + else + engb = try z.fmt.bufPrint( buf[len..], "{s}", .{ definition } ); + len += engb.len; + } + + if( i < sense_count and i < data.senses.len - 1 ) { + engb = try z.fmt.bufPrint( buf[len..], ", ", .{} ); + len += engb.len; + } + } + + return buf[0..len]; +} + +fn parseArgs( definitions_count: *u32, senses_count: *u32 ) ![]const u8 { + const args = try z.process.argsAlloc( alloc ); + defer z.process.argsFree( alloc, args ); + if( args.len < 2 ) { + z.debug.print( "usage: {s} [-d <definitions> -s <senses>] <word>\n", .{args[0]} ); + return error.InvalidArgs; + } + + var usedargs: u32 = 0; + for( args, 0.. ) |arg, i| { + if( z.mem.eql( u8, arg, "-d" ) ) { + if( i == args.len - 1 ) + return error.InvalidDefinitionCount; + + definitions_count.* = z.fmt.parseInt( u32, args[i + 1], 10 ) catch return error.InvalidSenseCount; + usedargs += 1; + } + if( z.mem.eql( u8, arg, "-s" ) ) { + if( i == args.len - 1 ) + return error.InvalidSenseCount; + + senses_count.* = z.fmt.parseInt( u32, args[i + 1], 10 ) catch return error.InvalidSenseCount; + usedargs += 1; + } + } + + if( args.len - usedargs < 2 ) { + z.debug.print( "usage: {s} [-d <definitions> -s <senses>] <word>\n", .{args[0]} ); + z.process.argsFree( alloc, args ); + return error.InvalidArgCount; + } + + return alloc.dupe( u8, args[args.len - 1] ); +} + +pub fn main() !void { + var definitions_count: u32 = 3; + var senses_count: u32 = 4; + + const word = parseArgs( &definitions_count, &senses_count ) catch |e| { + z.debug.print( "failed to parse arguments: {any}\n", .{e} ); + return; + }; + const res = requestWord( word ) catch |e| { + z.debug.print( "failed to request word: {any}\n", .{e} ); + return; + }; + const parsed = z.json.parseFromSlice( JishoData, alloc, res, .{ .ignore_unknown_fields = true } ) catch |e| { + z.debug.print( "failed to parse response body ({any}): {s}\n", .{e, res} ); + return; + }; + if( parsed.value.data.len == 0 ) { + z.debug.print( "response empty: not japanese?\n", .{} ); + return; + } + + const data = &parsed.value.data[0]; + + // Good Enough:tm: + var buf: [64000]u8 = undefined; + const str = formatDef( &buf, data, definitions_count - 1, senses_count - 1 ) catch |e| { + if( e == error.NotJapanese ) { + z.debug.print( "response empty: not japanese?\n", .{} ); + return; + } + return e; + }; + try z.io.getStdOut().writer().print( "{s}\n", .{str} ); + + parsed.deinit(); + alloc.free( word ); + alloc.free( res ); + _ = gpa.deinit(); +} diff --git a/src/percent_encoding.zig b/src/percent_encoding.zig new file mode 100644 index 0000000..6ada6b9 --- /dev/null +++ b/src/percent_encoding.zig @@ -0,0 +1,593 @@ +// credits to : https://github.com/bcrist/zig-percent-encoding + +pub const Encode_Type = enum { + raw, + percent_encoded, +}; +pub const Encode_Type_Space = enum { + raw, + percent_encoded, + @"+", +}; +pub const Encode_Options = struct { + alpha: Encode_Type = .raw, // [A-Za-z] + digits: Encode_Type = .raw, // [0-9] + spaces: Encode_Type_Space = .percent_encoded, + @"!": Encode_Type = .percent_encoded, + @"\"": Encode_Type = .percent_encoded, + @"#": Encode_Type = .percent_encoded, + @"$": Encode_Type = .percent_encoded, + @"%": Encode_Type = .percent_encoded, + @"&": Encode_Type = .percent_encoded, + @"'": Encode_Type = .percent_encoded, + @"(": Encode_Type = .percent_encoded, + @")": Encode_Type = .percent_encoded, + @"*": Encode_Type = .percent_encoded, + @"+": Encode_Type = .percent_encoded, + @",": Encode_Type = .percent_encoded, + @"-": Encode_Type = .raw, + @".": Encode_Type = .raw, + @"/": Encode_Type = .percent_encoded, + @":": Encode_Type = .percent_encoded, + @";": Encode_Type = .percent_encoded, + @"<": Encode_Type = .percent_encoded, + @"=": Encode_Type = .percent_encoded, + @">": Encode_Type = .percent_encoded, + @"?": Encode_Type = .percent_encoded, + @"@": Encode_Type = .percent_encoded, + @"[": Encode_Type = .percent_encoded, + @"\\": Encode_Type = .percent_encoded, + @"]": Encode_Type = .percent_encoded, + @"^": Encode_Type = .percent_encoded, + @"_": Encode_Type = .raw, + @"`": Encode_Type = .percent_encoded, + @"{": Encode_Type = .percent_encoded, + @"|": Encode_Type = .percent_encoded, + @"}": Encode_Type = .percent_encoded, + @"~": Encode_Type = .percent_encoded, // This is normally considered an unreserved character, but https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set includes it so we default to encoding it. + other: Encode_Type = .percent_encoded, // control chars, >= 0x80 + + pub fn should_encode(comptime self: Encode_Options, c: u8) bool { + if (self.alpha != self.other) switch (c | 0b00100000) { + 'a'...'z' => return self.alpha != .raw, + else => {}, + }; + if (self.digits != self.other) switch (c) { + '0'...'9', '-', '.', '_', '~' => return self.digits != .raw, + else => {}, + }; + + const spaces: Encode_Type = if (self.spaces == .raw) .raw else .percent_encoded; + if (spaces != self.other and c == ' ') return spaces != .raw; + + if (self.@"!" != self.other and c == '!') return self.@"!" != .raw; + if (self.@"\"" != self.other and c == '"') return self.@"\"" != .raw; + if (self.@"#" != self.other and c == '#') return self.@"#" != .raw; + if (self.@"$" != self.other and c == '$') return self.@"$" != .raw; + if (self.@"%" != self.other and c == '%') return self.@"%" != .raw; + if (self.@"&" != self.other and c == '&') return self.@"&" != .raw; + if (self.@"'" != self.other and c == '\'') return self.@"'" != .raw; + if (self.@"(" != self.other and c == '(') return self.@"(" != .raw; + if (self.@")" != self.other and c == ')') return self.@")" != .raw; + if (self.@"*" != self.other and c == '*') return self.@"*" != .raw; + if (self.@"+" != self.other and c == '+') return self.@"+" != .raw; + if (self.@"," != self.other and c == ',') return self.@"," != .raw; + if (self.@"-" != self.other and c == '-') return self.@"-" != .raw; + if (self.@"." != self.other and c == '.') return self.@"." != .raw; + if (self.@"/" != self.other and c == '/') return self.@"/" != .raw; + if (self.@":" != self.other and c == ':') return self.@":" != .raw; + if (self.@";" != self.other and c == ';') return self.@";" != .raw; + if (self.@"<" != self.other and c == '<') return self.@"<" != .raw; + if (self.@"=" != self.other and c == '=') return self.@"=" != .raw; + if (self.@">" != self.other and c == '>') return self.@">" != .raw; + if (self.@"?" != self.other and c == '?') return self.@"?" != .raw; + if (self.@"@" != self.other and c == '@') return self.@"@" != .raw; + if (self.@"[" != self.other and c == '[') return self.@"[" != .raw; + if (self.@"\\" != self.other and c == '\\') return self.@"\\" != .raw; + if (self.@"]" != self.other and c == ']') return self.@"]" != .raw; + if (self.@"^" != self.other and c == '^') return self.@"^" != .raw; + if (self.@"_" != self.other and c == '_') return self.@"_" != .raw; + if (self.@"`" != self.other and c == '`') return self.@"`" != .raw; + if (self.@"{" != self.other and c == '{') return self.@"{" != .raw; + if (self.@"|" != self.other and c == '|') return self.@"|" != .raw; + if (self.@"}" != self.other and c == '}') return self.@"}" != .raw; + if (self.@"~" != self.other and c == '~') return self.@"~" != .raw; + + return self.other != .raw; + } +}; + +pub fn encode_alloc(allocator: std.mem.Allocator, raw: []const u8, comptime options: Encode_Options) ![]const u8 { + if (raw.len == 0) return allocator.dupe(u8, raw); + + var iter = encode(raw, options); + const first = iter.next().?; + if (first.len == raw.len and first.ptr == raw.ptr) return allocator.dupe(u8, raw); + + var len = first.len; + while (iter.next()) |part| len += part.len; + + var result = std.ArrayListUnmanaged(u8).initBuffer(try allocator.alloc(u8, len)); + + iter = encode(raw, options); + while (iter.next()) |part| { + result.appendSliceAssumeCapacity(part); + } + + return result.items; +} +test encode_alloc { + try test_encode_alloc("", .{}, ""); + try test_encode_alloc("Hellorld!", .{}, "Hellorld%21"); + try test_encode_alloc("a b c", .{}, "a%20b%20c"); + try test_encode_alloc("a b c", .{ .spaces = .@"+" }, "a+b+c"); + try test_encode_alloc(" ", .{ .spaces = .percent_encoded }, "%20"); + try test_encode_alloc("Hello World", .{ .spaces = .raw }, "Hello World"); + try test_encode_alloc("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz"); + try test_encode_alloc("\x00\xFF", .{}, "%00%FF"); + try test_encode_alloc("\x00\xFF", .{ .other = .raw }, "\x00\xFF"); + try test_encode_alloc("!!", .{}, "%21%21"); + try test_encode_alloc("!\"", .{}, "%21%22"); + try test_encode_alloc("!#", .{}, "%21%23"); + try test_encode_alloc("!$", .{}, "%21%24"); + try test_encode_alloc("!%", .{}, "%21%25"); + try test_encode_alloc("!&", .{}, "%21%26"); + try test_encode_alloc("!'", .{}, "%21%27"); + try test_encode_alloc("!(", .{}, "%21%28"); + try test_encode_alloc("!)", .{}, "%21%29"); + try test_encode_alloc("!*", .{}, "%21%2A"); + try test_encode_alloc("!,", .{}, "%21%2C"); + try test_encode_alloc("!/", .{}, "%21%2F"); + try test_encode_alloc("!:", .{}, "%21%3A"); + try test_encode_alloc("!;", .{}, "%21%3B"); + try test_encode_alloc("!<", .{}, "%21%3C"); + try test_encode_alloc("!=", .{}, "%21%3D"); + try test_encode_alloc("!>", .{}, "%21%3E"); + try test_encode_alloc("!?", .{}, "%21%3F"); + try test_encode_alloc("!@", .{}, "%21%40"); + try test_encode_alloc("![", .{}, "%21%5B"); + try test_encode_alloc("!\\", .{}, "%21%5C"); + try test_encode_alloc("!]", .{}, "%21%5D"); + try test_encode_alloc("!^", .{}, "%21%5E"); + try test_encode_alloc("!`", .{}, "%21%60"); + try test_encode_alloc("!{", .{}, "%21%7B"); + try test_encode_alloc("!|", .{}, "%21%7C"); + try test_encode_alloc("!}", .{}, "%21%7D"); + try test_encode_alloc("!!", .{ .@"!" = .raw }, "!!"); + try test_encode_alloc("!#", .{ .@"#" = .raw }, "%21#"); + try test_encode_alloc("!$", .{ .@"$" = .raw }, "%21$"); + try test_encode_alloc("!&", .{ .@"&" = .raw }, "%21&"); + try test_encode_alloc("!'", .{ .@"'" = .raw }, "%21'"); + try test_encode_alloc("!(", .{ .@"(" = .raw }, "%21("); + try test_encode_alloc("!)", .{ .@")" = .raw }, "%21)"); + try test_encode_alloc("!*", .{ .@"*" = .raw }, "%21*"); + try test_encode_alloc("!,", .{ .@"," = .raw }, "%21,"); + try test_encode_alloc("!/", .{ .@"/" = .raw }, "%21/"); + try test_encode_alloc("!:", .{ .@":" = .raw }, "%21:"); + try test_encode_alloc("!;", .{ .@";" = .raw }, "%21;"); + try test_encode_alloc("!=", .{ .@"=" = .raw }, "%21="); + try test_encode_alloc("!?", .{ .@"?" = .raw }, "%21?"); + try test_encode_alloc("!@", .{ .@"@" = .raw }, "%21@"); + try test_encode_alloc("![", .{ .@"[" = .raw }, "%21["); + try test_encode_alloc("!]", .{ .@"]" = .raw }, "%21]"); +} +fn test_encode_alloc(input: []const u8, comptime options: Encode_Options, expected: []const u8) !void { + const actual = try encode_alloc(std.testing.allocator, input, options); + defer std.testing.allocator.free(actual); + try std.testing.expectEqualStrings(expected, actual); +} + +pub fn encode_maybe_append(list: *std.ArrayList(u8), raw: []const u8, comptime options: Encode_Options) ![]const u8 { + // `raw` must not reference the list's backing buffer, since it might be reallocated in this function. + std.debug.assert(@intFromPtr(raw.ptr) >= @intFromPtr(list.items.ptr + list.capacity) + or @intFromPtr(list.items.ptr) >= @intFromPtr(raw.ptr + raw.len)); + + if (raw.len == 0) return raw; + + var iter = encode(raw, options); + const first = iter.next().?; + if (first.len == raw.len and first.ptr == raw.ptr) return first; + + const prefix_length = list.items.len; + try list.appendSlice(first); + while (iter.next()) |part| { + try list.appendSlice(part); + } + + return list.items[prefix_length..]; +} +test encode_maybe_append { + try test_encode_maybe_append("", .{}, ""); + try test_encode_maybe_append("Hellorld!", .{}, "Hellorld%21"); + try test_encode_maybe_append(" ", .{ .spaces = .percent_encoded }, "%20"); + try test_encode_maybe_append("Hello World", .{ .spaces = .raw }, "Hello World"); + try test_encode_maybe_append("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz"); + try test_encode_maybe_append("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{ .alpha = .percent_encoded, .digits = .percent_encoded }, "_.-%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F%50%51%52%53%54%55%56%57%58%59%5A%30%31%32%33%34%35%36%37%38%39%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F%70%71%72%73%74%75%76%77%78%79%7A"); + try test_encode_maybe_append("\x00\xFF", .{}, "%00%FF"); + try test_encode_maybe_append("\x00\xFF", .{ .other = .raw }, "\x00\xFF"); +} +fn test_encode_maybe_append(input: []const u8, comptime options: Encode_Options, expected: []const u8) !void { + var temp = std.ArrayList(u8).init(std.testing.allocator); + defer temp.deinit(); + + const actual = try encode_maybe_append(&temp, input, options); + try std.testing.expectEqualStrings(expected, actual); +} + +pub fn encode_append(list: *std.ArrayList(u8), raw: []const u8, comptime options: Encode_Options) !void { + var iter = encode(raw, options); + while (iter.next()) |part| { + try list.appendSlice(part); + } +} +test encode_append { + try test_encode_append("", .{}, ""); + try test_encode_append("Hellorld!", .{}, "Hellorld%21"); + try test_encode_append(" ", .{ .spaces = .percent_encoded }, "%20"); + try test_encode_append("Hello World", .{ .spaces = .raw }, "Hello World"); + try test_encode_append("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz"); + try test_encode_append("_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{ .alpha = .percent_encoded, .digits = .percent_encoded }, "_.-%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F%50%51%52%53%54%55%56%57%58%59%5A%30%31%32%33%34%35%36%37%38%39%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F%70%71%72%73%74%75%76%77%78%79%7A"); + try test_encode_append("\x00\xFF", .{}, "%00%FF"); + try test_encode_append("\x00\xFF", .{ .other = .raw }, "\x00\xFF"); +} +fn test_encode_append(input: []const u8, comptime options: Encode_Options, expected: []const u8) !void { + var temp = std.ArrayList(u8).init(std.testing.allocator); + defer temp.deinit(); + + try encode_append(&temp, input, options); + try std.testing.expectEqualStrings(expected, temp.items); +} + +pub fn encode_writer(writer: anytype, input: []const u8, comptime options: Encode_Options) @TypeOf(writer).Error!void { + var encoder = encode(input, options); + while (encoder.next()) |chunk| { + try writer.writeAll(chunk); + } +} + +pub fn encode(raw: []const u8, comptime options: Encode_Options) Encoder(options) { + return .{ .remaining = raw }; +} +pub fn Encoder(comptime options: Encode_Options) type { + comptime if (options.spaces == .@"+") std.debug.assert(options.@"+" == .percent_encoded); + return struct { + remaining: []const u8, + temp: [3]u8 = "%00".*, + + pub fn next(self: *@This()) ?[]const u8 { + const remaining = self.remaining; + if (remaining.len == 0) return null; + + for (0.., remaining) |i, c| { + const should_encode = options.should_encode(c); + + if (should_encode) { + if (i > 0) { + self.remaining = remaining[i..]; + return remaining[0..i]; + } + var temp: []u8 = &self.temp; + if (c == ' ' and options.spaces == .@"+") { + temp = temp[2..]; + temp[0] = '+'; + } else { + @memcpy(temp[1..], &std.fmt.bytesToHex(&[_]u8{c}, .upper)); + } + self.remaining = remaining[1..]; + return temp; + } + } + + self.remaining = ""; + return remaining; + } + }; +} + +pub const Decode_Options = struct { + decode_plus_as_space: bool = true, +}; +pub fn decode_alloc(allocator: std.mem.Allocator, encoded: []const u8, comptime options: Decode_Options) ![]const u8 { + if (encoded.len == 0) return try allocator.dupe(u8, encoded); + + var iter = decode(encoded, options); + const first = iter.next().?; + if (first.len == encoded.len and first.ptr == encoded.ptr) return try allocator.dupe(u8, encoded); + + var len = first.len; + while (iter.next()) |part| len += part.len; + + var result = std.ArrayListUnmanaged(u8).initBuffer(try allocator.alloc(u8, len)); + + iter = decode(encoded, options); + while (iter.next()) |part| { + result.appendSliceAssumeCapacity(part); + } + + return result.items; +} +test decode_alloc { + try test_decode_alloc("", .{}, ""); + try test_decode_alloc("Hellorld!", .{}, "Hellorld!"); + try test_decode_alloc("Hellorld%21", .{}, "Hellorld!"); + try test_decode_alloc("a+b+c", .{}, "a b c"); + try test_decode_alloc("+", .{ .decode_plus_as_space = false }, "+"); + try test_decode_alloc("Hello%20World", .{}, "Hello World"); + try test_decode_alloc("~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz"); + try test_decode_alloc("%00%FF", .{}, "\x00\xFF"); + try test_decode_alloc("%21%21", .{}, "!!"); + try test_decode_alloc("%21%22", .{}, "!\""); + try test_decode_alloc("%21%23", .{}, "!#"); + try test_decode_alloc("%21%24", .{}, "!$"); + try test_decode_alloc("%21%25", .{}, "!%"); + try test_decode_alloc("%21%26", .{}, "!&"); + try test_decode_alloc("%21%27", .{}, "!'"); + try test_decode_alloc("%21%28", .{}, "!("); + try test_decode_alloc("%21%29", .{}, "!)"); + try test_decode_alloc("%21%2A", .{}, "!*"); + try test_decode_alloc("%21%2C", .{}, "!,"); + try test_decode_alloc("%21%2F", .{}, "!/"); + try test_decode_alloc("%21%3A", .{}, "!:"); + try test_decode_alloc("%21%3B", .{}, "!;"); + try test_decode_alloc("%21%3C", .{}, "!<"); + try test_decode_alloc("%21%3D", .{}, "!="); + try test_decode_alloc("%21%3E", .{}, "!>"); + try test_decode_alloc("%21%3F", .{}, "!?"); + try test_decode_alloc("%21%40", .{}, "!@"); + try test_decode_alloc("%21%5B", .{}, "!["); + try test_decode_alloc("%21%5C", .{}, "!\\"); + try test_decode_alloc("%21%5D", .{}, "!]"); + try test_decode_alloc("%21%5E", .{}, "!^"); + try test_decode_alloc("%21%60", .{}, "!`"); + try test_decode_alloc("%21%7B", .{}, "!{"); + try test_decode_alloc("%21%7C", .{}, "!|"); + try test_decode_alloc("%21%7D", .{}, "!}"); +} +fn test_decode_alloc(input: []const u8, comptime options: Decode_Options, expected: []const u8) !void { + const actual = try decode_alloc(std.testing.allocator, input, options); + defer std.testing.allocator.free(actual); + try std.testing.expectEqualStrings(expected, actual); +} + +pub fn decode_maybe_append(list: *std.ArrayList(u8), encoded: []const u8, comptime options: Decode_Options) ![]const u8 { + // `encoded` must not reference the list's backing buffer, since it might be reallocated in this function. + std.debug.assert(@intFromPtr(encoded.ptr) >= @intFromPtr(list.items.ptr + list.capacity) + or @intFromPtr(list.items.ptr) >= @intFromPtr(encoded.ptr + encoded.len)); + + if (encoded.len == 0) return encoded; + + var iter = decode(encoded, options); + const first = iter.next().?; + if (first.len == encoded.len and first.ptr == encoded.ptr) return first; + + const prefix_length = list.items.len; + try list.appendSlice(first); + while (iter.next()) |part| { + try list.appendSlice(part); + } + + return list.items[prefix_length..]; +} +test decode_maybe_append { + try test_decode_maybe_append("", .{}, ""); + try test_decode_maybe_append("Hellorld!", .{}, "Hellorld!"); + try test_decode_maybe_append("Hellorld%21", .{}, "Hellorld!"); + try test_decode_maybe_append("a+b+c", .{}, "a b c"); + try test_decode_maybe_append("+", .{ .decode_plus_as_space = false }, "+"); + try test_decode_maybe_append("Hello%20World", .{}, "Hello World"); + try test_decode_maybe_append("~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz"); +} +fn test_decode_maybe_append(input: []const u8, comptime options: Decode_Options, expected: []const u8) !void { + var temp = std.ArrayList(u8).init(std.testing.allocator); + defer temp.deinit(); + + const actual = try decode_maybe_append(&temp, input, options); + try std.testing.expectEqualStrings(expected, actual); +} + +pub fn decode_append(list: *std.ArrayList(u8), encoded: []const u8, comptime options: Decode_Options) !void { + var iter = decode(encoded, options); + while (iter.next()) |part| { + try list.appendSlice(part); + } +} +test decode_append { + try test_decode_append("", .{}, ""); + try test_decode_append("Hellorld!", .{}, "Hellorld!"); + try test_decode_append("Hellorld%21", .{}, "Hellorld!"); + try test_decode_append("a+b+c", .{}, "a b c"); + try test_decode_append("+", .{ .decode_plus_as_space = false }, "+"); + try test_decode_append("Hello%20World", .{}, "Hello World"); + try test_decode_append("~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", .{}, "~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz"); +} +fn test_decode_append(input: []const u8, comptime options: Decode_Options, expected: []const u8) !void { + var temp = std.ArrayList(u8).init(std.testing.allocator); + defer temp.deinit(); + + try decode_append(&temp, input, options); + try std.testing.expectEqualStrings(expected, temp.items); +} + +pub fn decode_in_place(encoded: []u8, comptime options: Decode_Options) []const u8 { + return decode_backwards(encoded, encoded, options); +} + +pub fn decode_backwards(output: []u8, encoded: []const u8, comptime options: Decode_Options) []const u8 { + var remaining = output; + var iter = decode(encoded, options); + while (iter.next()) |span| { + std.mem.copyForwards(u8, remaining, span); + remaining = remaining[span.len..]; + } + return output[0 .. output.len - remaining.len]; +} + +pub fn decode_writer(writer: anytype, encoded: []const u8, comptime options: Decode_Options) @TypeOf(writer).Error!void { + var iter = decode(encoded, options); + while (iter.next()) |part| { + try writer.writeAll(part); + } +} + +pub fn decode(encoded: []const u8, comptime options: Decode_Options) Decoder(options) { + return .{ .remaining = encoded }; +} +pub fn Decoder(comptime options: Decode_Options) type { + return struct { + remaining: []const u8, + temp: [1]u8 = undefined, + + pub fn next(self: *@This()) ?[]const u8 { + const remaining = self.remaining; + if (remaining.len == 0) return null; + + if (remaining[0] == '%') { + if (remaining.len >= 3) { + self.temp[0] = std.fmt.parseInt(u8, remaining[1..3], 16) catch { + self.remaining = remaining[1..]; + return remaining[0..1]; + }; + self.remaining = remaining[3..]; + return &self.temp; + } else { + self.remaining = remaining[1..]; + return remaining[0..1]; + } + } else if (options.decode_plus_as_space and remaining[0] == '+') { + self.temp[0] = ' '; + self.remaining = remaining[1..]; + return &self.temp; + } + + if (options.decode_plus_as_space) { + if (std.mem.indexOfAny(u8, remaining, "%+")) |end| { + self.remaining = remaining[end..]; + return remaining[0..end]; + } + } else { + if (std.mem.indexOfScalar(u8, remaining, '%')) |end| { + self.remaining = remaining[end..]; + return remaining[0..end]; + } + } + + self.remaining = ""; + return remaining; + } + }; +} + +pub fn fmtEncoded(raw: []const u8) std.fmt.Formatter(format) { + return .{ .data = raw }; +} + +fn format(raw: []const u8, comptime fmt: []const u8, _: std.fmt.FormatOptions, writer: anytype) @TypeOf(writer).Error!void { + comptime var encode_options: Encode_Options = .{}; + + if (fmt.len > 0) { + comptime var final_fmt = fmt; + comptime var apply_type: Encode_Type = .raw; + if (comptime std.mem.startsWith(u8, fmt, "allow")) { + final_fmt = fmt["allow".len..]; + } else if (comptime std.mem.startsWith(u8, fmt, "except")) { + final_fmt = fmt["except".len..]; + encode_options.@"-" = .percent_encoded; + encode_options.@"." = .percent_encoded; + encode_options.@"_" = .percent_encoded; + encode_options.@"~" = .percent_encoded; + } else if (comptime std.mem.startsWith(u8, fmt, "only")) { + final_fmt = fmt["only".len..]; + apply_type = .percent_encoded; + encode_options.@"!" = .raw; + encode_options.@"\"" = .raw; + encode_options.@"#" = .raw; + encode_options.@"$" = .raw; + encode_options.@"%" = .raw; + encode_options.@"&" = .raw; + encode_options.@"'" = .raw; + encode_options.@"(" = .raw; + encode_options.@")" = .raw; + encode_options.@"*" = .raw; + encode_options.@"+" = .raw; + encode_options.@"," = .raw; + encode_options.@"/" = .raw; + encode_options.@":" = .raw; + encode_options.@";" = .raw; + encode_options.@"<" = .raw; + encode_options.@"=" = .raw; + encode_options.@">" = .raw; + encode_options.@"?" = .raw; + encode_options.@"@" = .raw; + encode_options.@"[" = .raw; + encode_options.@"\\" = .raw; + encode_options.@"]" = .raw; + encode_options.@"^" = .raw; + encode_options.@"`" = .raw; + encode_options.@"{" = .raw; + encode_options.@"|" = .raw; + encode_options.@"}" = .raw; + } else { + @compileError("Format string must be empty or begin with 'allow', 'except', or 'only', but found: " ++ fmt); + } + inline for (final_fmt) |c| switch (c) { + '!' => encode_options.@"!" = apply_type, + '"' => encode_options.@"\"" = apply_type, + '#' => encode_options.@"#" = apply_type, + '$' => encode_options.@"$" = apply_type, + '%' => encode_options.@"%" = apply_type, + '&' => encode_options.@"&" = apply_type, + '\'' => encode_options.@"'" = apply_type, + '(' => encode_options.@"(" = apply_type, + ')' => encode_options.@")" = apply_type, + '*' => encode_options.@"*" = apply_type, + '+' => encode_options.@"+" = apply_type, + ',' => encode_options.@"," = apply_type, + '-' => encode_options.@"-" = apply_type, + '.' => encode_options.@"." = apply_type, + '/' => encode_options.@"/" = apply_type, + 'c' => encode_options.@":" = apply_type, + ';' => encode_options.@";" = apply_type, + '<' => encode_options.@"<" = apply_type, + '=' => encode_options.@"=" = apply_type, + '>' => encode_options.@">" = apply_type, + '?' => encode_options.@"?" = apply_type, + '@' => encode_options.@"@" = apply_type, + '[' => encode_options.@"[" = apply_type, + '\\' => encode_options.@"\\" = apply_type, + ']' => encode_options.@"]" = apply_type, + '^' => encode_options.@"^" = apply_type, + '_' => encode_options.@"_" = apply_type, + '`' => encode_options.@"`" = apply_type, + '{' => encode_options.@"{" = apply_type, + '|' => encode_options.@"|" = apply_type, + '}' => encode_options.@"}" = apply_type, + '~' => encode_options.@"~" = apply_type, + ' ' => encode_options.spaces = apply_type, + else => @compileError("invalid percent encoding specifier: " ++ fmt), + }; + if (encode_options.@"+" == .raw and encode_options.spaces == .@"+") { + encode_options.spaces = .percent_encoded; + } + } + + var encoder = encode(raw, encode_options); + while (encoder.next()) |chunk| { + try writer.writeAll(chunk); + } +} + +test fmtEncoded { + try test_fmtEncoded("", "", ""); + try test_fmtEncoded("Hellorld!", "", "Hellorld%21"); + try test_fmtEncoded(" ", "", "%20"); + try test_fmtEncoded("~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz", "", "~_.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz"); + try test_fmtEncoded("@*", "only*", "@%2A"); + try test_fmtEncoded("[@*]", "except[]", "[%40%2A]"); +} +fn test_fmtEncoded(input: []const u8, comptime fmt: []const u8, expected: []const u8) !void { + const temp = try std.fmt.allocPrint(std.testing.allocator, "{" ++ fmt ++ "}", .{ fmtEncoded(input) }); + defer std.testing.allocator.free(temp); + try std.testing.expectEqualStrings(expected, temp); +} + +const std = @import("std"); |
