try zig

2025-01-16 20:27:36 +01:00 · 2025-01-16 20:27:36 +01:00 · 9e5d32a420
parent 56883c0bed
commit 9e5d32a420
10 changed files with 718 additions and 28 deletions
--- a/build.zig.zon
+++ b/build.zig.zon
@ -15,6 +15,9 @@
    // Once all dependencies are fetched, `zig build` no longer requires
    // internet connectivity.
    .dependencies = .{
        .lsfw = .{
            .path = "lib/lsfw",
        }
        // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
        //.example = .{
        //    // When updating this field to a new URL, be sure to delete the corresponding
--- a/data/example.il
+++ b/data/example.il
@ -13,6 +13,7 @@ b_var->a
  ; some struff to do
 )
 "srting"
 "wqdwd\"qwesfwf"
--- a/src/classifier.zig
+++ b/src/classifier.zig
@ -0,0 +1,91 @@
 const tkz = @import("tokenize.zig");
 const hlp = @import("helpers.zig");
 const std = @import("std");
 pub const TokenClass = enum {
    symbol,
    string,
    comment,
    docstring,
    number,
    nil,
    t,
    list_start,
    list_lazy_start,
    list_end,
    operator,
 };
 pub const ClassifiedToken = struct {
    tok: tkz.Token,
    cls: TokenClass,
 };
 const operators = std.ComptimeStringMap(void, .{
    .{"->"},
    .{"~>"},
    .{"/="},
    .{"*="},
    .{"-="},
    .{"+="},
    .{"||"},
    .{"&&"},
    .{"="},
    .{"+"},
    .{"-"},
    .{"*"},
    .{"/"},
    .{"~"},
    .{"%"},
    .{"@keys"},
    .{"@rest"},
 });
 const numbers = std.ComptimeStringMap(void, .{
    .{"0"},
    .{"1"},
    .{"2"},
    .{"3"},
    .{"4"},
    .{"5"},
    .{"6"},
    .{"7"},
    .{"8"},
    .{"9"},
 });
 fn classify(tok: tkz.Token) ClassifiedToken {
    return ClassifiedToken{
        .tok = tok,
        .cls = if (operators.has(tok.value))
            TokenClass.operator
        else if (std.mem.eql(u8, "'(", tok.value))
            TokenClass.list_lazy_start
        else if (std.mem.eql(u8, "(", tok.value))
            TokenClass.list_start
        else if (std.mem.eql(u8, ")", tok.value))
            TokenClass.list_end
        else if (std.mem.eql(u8, "\"", tok.value[0..1]))
            TokenClass.string
        else if (std.mem.eql(u8, "nil", tok.value))
            TokenClass.nil
        else if (std.mem.eql(u8, "t", tok.value))
            TokenClass.t
        else if (numbers.has(tok.value[0..1]))
            TokenClass.number
        else if (std.mem.eql(u8, ";", tok.value[0..1]))
            if (tok.value.len >= 3 and std.mem.eql(u8, ";;;", tok.value[0..3])) TokenClass.docstring else TokenClass.comment
        else
            TokenClass.symbol,
    };
 }
 pub fn classifyTokens(toks: []const tkz.Token, allocator: std.mem.Allocator) !std.ArrayList(ClassifiedToken) {
    var ctoks = std.ArrayList(ClassifiedToken).init(allocator);
    for (toks) |tok| {
        try ctoks.append(classify(tok));
    }
    return ctoks;
 }
--- a/src/helpers.zig
+++ b/src/helpers.zig
@ -0,0 +1,9 @@
 const std = @import("std");
 pub fn isPartOf(comptime T: type, haystack: [][]const T, needle: []const T) bool {
    for (haystack) |straw| {
        if (std.mem.eql(u8, straw, needle[0..straw.len])) {
            return true;
        }
    }
    return false;
 }
--- a/src/lsp.zig
+++ b/src/lsp.zig
@ -2,17 +2,74 @@ const std = @import("std");
 const lsp_types = @import("lsfw/src/types.zig");
 const lsp = @import("lsfw/src/lsp.zig");
 const lsp_doc = @import("lsfw/src/document.zig");
 const lsp_log = @import("lsfw/src/logger.zig");
 const tkz = @import("tokenize.zig");
 const cls = @import("classifier.zig");
-const State = struct {};
+const State = struct { symbols: std.ArrayList(cls.ClassifiedToken) };
 const Lsp = lsp.Lsp(State);
 const Scope = enum { hi };
 fn handleHover(allocator: std.mem.Allocator, ctx: *Lsp.Context, pos: lsp_types.Position) ?[]const u8 {
    if (null == ctx.state) {
        lsp_log.notify(.info, "could not find token under cursor (at {})", .{pos});
        return null;
    } else if (0 == ctx.state.?.symbols.items.len) {
        handleDocOpen(allocator, ctx);
    }
    lsp_log.notify(.err, "{}", .{ctx.state.?.symbols});
    // for (ctx.state.?.symbols.items) |tok| {
    //     if (tok.tok.line == pos.line and tok.tok.char <= pos.character and (tok.tok.char + tok.tok.value.len) >= pos.character) {
    //         lsp_log.notify(.info, "{}", .{tok});
    //         break;
    //     }
    // }
 fn handleHover(allocator: std.mem.Allocator, context: *Lsp.Context, position: lsp_types.Position) ?[]const u8 {
    _ = allocator;
    _ = position;
    _ = context;
    return null;
 }
 fn handleCompletion(allocator: std.mem.Allocator, context: *Lsp.Context, position: lsp_types.Position) ?lsp_types.CompletionList {
    _ = context;
    _ = position;
    var completions = std.ArrayList(lsp_types.CompletionItem).init(allocator);
    if (std.mem.Allocator.Error.OutOfMemory == completions.append(.{
        .label = "(procedure)",
        .insertText = "(procedure ${1:func_name}($2)\n\n)",
        .insertTextFormat = .Snippet,
        .kind = .Function,
    })) {
        return null;
    }
    return .{ .items = completions.items };
 }
 fn handleDocOpen(allocator: std.mem.Allocator, context: *Lsp.Context) void {
    lsp_log.notify(.err, "opened doc {s}", .{context.document.uri});
    const content = context.document.text;
    const toks = tkz.tokenizeContent(content, allocator) catch unreachable;
    // const toks = std.ArrayList(tkz.Token).init(allocator);
    lsp_log.notify(.err, "toks {}", .{toks});
    // defer toks.deinit();
    const ctoks = cls.classifyTokens(toks.items, allocator) catch unreachable;
    lsp_log.notify(.err, "ctoks {}", .{ctoks});
    // defer ctoks.deinit();
    // const ast = try stx.generateSyntaxTree(ctoks);
    lsp_log.notify(.info, "opened {s}, found {d} tokens", .{ context.document.uri, ctoks.items.len });
    if (context.state != null) {
        context.state.?.symbols.deinit();
    }
    context.state = .{
        .symbols = std.ArrayList(cls.ClassifiedToken).init(allocator),
    };
 }
 fn handleDocChanged(allocator: std.mem.Allocator, context: *Lsp.Context, _: []lsp_types.ChangeEvent) void {
    handleDocOpen(allocator, context);
 }
 fn handleDocClose(_: std.mem.Allocator, _: *Lsp.Context) void {}
 pub fn start() !u8 {
    const descr = lsp_types.ServerData{
        .serverInfo = .{
@ -21,8 +78,13 @@ pub fn start() !u8 {
        },
    };
-    var server = Lsp.init(std.heap.page_allocator, descr);
+    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    var server = Lsp.init(gpa.allocator(), descr);
    server.registerHoverCallback(handleHover);
    server.registerCompletionCallback(handleCompletion);
    server.registerDocOpenCallback(handleDocOpen);
    server.registerDocChangeCallback(handleDocChanged);
    server.registerDocCloseCallback(handleDocClose);
    return server.start();
 }
--- a/src/main.zig
+++ b/src/main.zig
@ -1,18 +1,35 @@
 const std = @import("std");
-const tkz = @import("tokenize.zig");
+const tkz = @import("tokenizer.zig");
 // const cls = @import("classifier.zig");
 // const stx = @import("syntax.zig");
 const lsp = @import("lsp.zig");
 pub fn main() !void {
-    var file = try std.fs.cwd().openFile("data/example.il", .{});
+    // var file = try std.fs.cwd().openFile("data/example.il", .{});
-    defer file.close();
+    // defer file.close();
-
+    //
-    const content = try file.readToEndAlloc(std.heap.page_allocator, 4096 * ((1 << 10) << 10));
+    // const content = try file.readToEndAlloc(std.heap.page_allocator, 4096 * ((1 << 10) << 10));
-
+    //
-    const toks = try tkz.tokenizeContent(content);
+    // const toks = try tkz.tokenizeContent(content);
-    for (toks.items) |tok| {
+    // // for (toks.items) |tok| {
-        std.debug.print("{}:{} `{s}`\n", .{
+    // //     std.debug.print("{}:{} `{s}`\n", .{
-            tok.line,
+    // //         tok.line,
-            tok.char,
+    // //         tok.char,
-            tok.value,
+    // //         tok.value,
-        });
+    // //     });
-    }
+    // // }
    //
    // const ctoks = try cls.classifyTokens(toks);
    // // for (ctoks.items) |ctok| {
    // //     std.debug.print("{}:{}\t`{s:<40}`({})\n", .{
    // //         ctok.tok.line,
    // //         ctok.tok.char,
    // //         ctok.tok.value,
    // //         ctok.cls,
    // //     });
    // // }
    // const ast = try stx.generateSyntaxTree(ctoks);
    // std.debug.print("{}\n", .{ast});
    //
    _ = try lsp.start();
 }
--- a/src/parser.zig
+++ b/src/parser.zig
@ -0,0 +1,178 @@
 const std = @import("std");
 const toks = @import("tokenizer.zig");
 pub const ParseError = error{ no_fn_name, no_fn_params };
 pub const Tag = enum {
    ///expression
    ///`<rhs...>`
    ///
    ///lhs ignored
    expr,
    ///variable assignment
    ///`<lhs> = <rhs...>`
    ///
    ///lhs is overwritten to be variable
    var_assign,
    ///lazy evaluated list
    ///`'(<rhs...>)`
    ///
    ///lhs ignored
    llist,
    ///list (evaluated)
    ///`(<lhs> <rhs...>)`
    ///
    ///lhs needs to be a callable
    list_eval,
    ///fn_def (procedure)
    ///`;;; <lhs>
    ///(procedure <main_token>(<lhs>) <rhs...>)`
    fn_def,
 };
 pub const Node = struct {
    tag: Tag,
    main_token: Index,
    data: Data,
    pub const Data = struct {
        lhs: Index,
        rhs: Index,
    };
    pub const Index = u32;
 };
 pub const AstError = error{};
 pub const Parser = struct {
    gpa: std.mem.Allocator,
    source: [:0]const u8,
    token_tags: []const toks.Token.Tag,
    token_locs: []const toks.Token.Loc,
    tok_i: Node.Index,
    errs: std.ArrayList(AstError),
    nodes: std.MultiArrayList(Node),
    extra_data: std.ArrayList(Node.Index),
    scratch: std.ArrayList(Node.Index),
    pub fn init(buffer: [:0]const u8, mal: std.MultiArrayList(toks.Token), allocator: std.mem.Allocator) !Parser {
        return .{
            .gpa = allocator,
            .source = buffer,
            .token_tags = mal.items(.tag),
            .token_locs = mal.items(.loc),
            .tok_i = 0,
            .errs = std.ArrayList(AstError).init(allocator),
            .nodes = std.MultiArrayList(Node){},
            .extra_data = std.ArrayList(Node.Index).init(allocator),
            .scratch = std.ArrayList(Node.Index).init(allocator),
        };
    }
    fn hasToken(self: *Parser, expected: toks.Token.Tag, offset: isize) ?toks.Token {
        if (self.token_tags[@intCast(self.tok_i + offset)] == expected) {
            return .{ .loc = self.token_locs[@intCast(self.tok_i + offset)], .tag = self.token_tags[@intCast(self.tok_i + offset)] };
        }
        return null;
    }
    fn eatToken(self: *Parser, expected: toks.Token.Tag) ?Node.Index {
        const tok = self.hasToken(expected, 0);
        if (tok != null) {
            self.tok_i += 1;
            return self.tok_i - 1;
        }
        return null;
    }
    fn parse_fn_proc(self: *Parser) ?Node {
        _ = self.eatToken(.sym);
        if (self.hasToken(.list_l, -2) != null) {
            // lisp style
        } else if (self.eatToken(.list_l) != null) {
            // c style
        } else {
            // not a procedure call or invalid syntax?
        }
        const name = self.eatToken(.sym) orelse return null;
        std.debug.print("found procedure def for `{s}`", .{self.source[self.token_locs[name].start..self.token_locs[name].end]});
        _ = self.eatToken(.list_l) orelse return null;
        var open_lists: usize = 0;
        while (true) : (self.tok_i += 1) {
            switch (self.token_tags[self.tok_i]) {
                .list_l, .list_lz => {
                    open_lists += 1;
                },
                .list_r => {
                    if (open_lists > 0) {
                        open_lists -= 1;
                    } else {
                        break;
                    }
                },
                else => {},
            }
        }
        while (true) : (self.tok_i += 1) {
            switch (self.token_tags[self.tok_i]) {
                .list_l, .list_lz => {
                    open_lists += 1;
                },
                .list_r => {
                    if (open_lists > 0) {
                        open_lists -= 1;
                    } else {
                        break;
                    }
                },
                else => {},
            }
        }
        self.tok_i += 1;
        return Node{ .tag = .fn_def, .main_token = name, .data = .{ .lhs = 0, .rhs = 0 } };
    }
    pub fn next(self: *Parser) ?Node {
        while (self.tok_i < self.token_tags.len) : (self.tok_i += 1) {
            switch (self.token_tags[self.tok_i]) {
                toks.Token.Tag.sym => {
                    if (std.mem.eql(u8, "procedure", self.source[self.token_locs[self.tok_i].start..self.token_locs[self.tok_i].end])) {
                        return self.parse_fn_proc();
                    }
                },
                else => {},
            }
        }
        return null;
    }
 };
 test "parsing of simple example" {
    const example =
        \\t
        \\nil
        \\a = b
        \\"some string w/ escaped\""
        \\(procedure a() )
    ;
    var tokz = toks.Tokenizer.init(example);
    var tokens = std.MultiArrayList(toks.Token){};
    defer tokens.deinit(std.testing.allocator);
    while (tokz.next()) |tok| {
        try tokens.append(std.testing.allocator, tok);
        std.debug.print("{}\n", .{tok});
    }
    var parse = try Parser.init(example, tokens, std.testing.allocator);
    while (parse.next()) |ast_node| {
        std.debug.print("{}\n", .{ast_node});
    }
 }
--- a/src/syntax.zig
+++ b/src/syntax.zig
@ -0,0 +1,55 @@
 const std = @import("std");
 const cls = @import("classifier.zig");
 pub const SyntaxNode = struct {
    ctok: cls.ClassifiedToken,
    nodes: ?std.ArrayList(SyntaxNode),
 };
 pub fn generateSyntaxTree(ctoks: std.ArrayList(cls.ClassifiedToken)) !std.ArrayList(SyntaxNode) {
    var nodes = std.ArrayList(SyntaxNode).init(std.heap.page_allocator);
    var actives = std.ArrayList(SyntaxNode).init(std.heap.page_allocator);
    for (ctoks.items) |ctok| {
        switch (ctok.cls) {
            cls.TokenClass.comment, cls.TokenClass.docstring => {
                try nodes.append(.{
                    .ctok = ctok,
                    .nodes = null,
                });
            },
            cls.TokenClass.list_start, cls.TokenClass.list_lazy_start => {
                try actives.append(.{
                    .ctok = ctok,
                    .nodes = std.ArrayList(SyntaxNode).init(std.heap.page_allocator),
                });
            },
            cls.TokenClass.list_end => {
                if (actives.items.len > 0) {
                    try nodes.append(actives.pop());
                } else {
                    std.debug.print("{}\n", .{actives});
                }
            },
            else => {
                const active_top = actives.popOrNull();
                if (active_top != null) {
                    var active = active_top.?;
                    var actives_nodes: std.ArrayList(SyntaxNode) = undefined;
                    if (active.nodes != null) {
                        actives_nodes = active.nodes.?;
                    } else {
                        active.nodes = std.ArrayList(SyntaxNode).init(std.heap.page_allocator);
                        actives_nodes = active.nodes.?;
                    }
                    try actives_nodes.append(.{
                        .ctok = ctok,
                        .nodes = null,
                    });
                } else {}
            },
        }
    }
    return nodes;
 }
--- a/src/tokenize.zig
+++ b/src/tokenize.zig
@ -1,6 +1,7 @@
 const std = @import("std");
 const lsp = @import("lsfw/src/lsp.zig");
-const Token = struct {
+pub const Token = struct {
    /// 0-based index of token start in whole file
    start: usize,
    /// 1-based line numbert token starts at
@ -13,9 +14,10 @@ const Token = struct {
 const TokenizationError = error{InvalidKeyword};
-pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
+pub fn tokenizeContent(content: []u8, allocator: std.mem.Allocator) !std.ArrayList(Token) {
-    var toks = std.ArrayList(Token).init(std.heap.page_allocator);
+    var toks = std.ArrayList(Token).init(allocator);
-    var lines = std.ArrayList(usize).init(std.heap.page_allocator);
+    var lines = std.ArrayList(usize).init(allocator);
    defer lines.deinit();
    var index: usize = 0;
    while (index < content.len) {
@ -68,7 +70,7 @@ pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
                if (std.mem.eql(u8, "@keys", content[index .. index + l])) {} else if (std.mem.eql(u8, "@rest", content[index .. index + l])) {} else {
                    std.debug.print("line={d}, char={d}\n", .{
-                        .line = lines.items.len + 1,
+                        .line = lines.items.len,
                        .char = switch (lines.items.len) {
                            0 => index,
                            else => index - lines.items[lines.items.len - 1],
@ -83,8 +85,8 @@ pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
        };
        try toks.append(.{
            .start = index,
-            .value = content[index .. index + l],
+            .value = try allocator.dupe(u8, content[index .. index + l]),
-            .line = lines.items.len + 1,
+            .line = lines.items.len,
            .char = switch (lines.items.len) {
                0 => index,
                else => index - lines.items[lines.items.len - 1],
@ -92,6 +94,6 @@ pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
        });
        index += l;
    }
-
+    lsp.logger.notify(.err, "done with initial tokenization, generated {d} tokens", .{toks.items.len});
    return toks;
 }
--- a/src/tokenizer.zig
+++ b/src/tokenizer.zig
@ -0,0 +1,272 @@
 const std = @import("std");
 pub const Token = struct {
    tag: Tag,
    loc: Loc,
    pub const Loc = struct {
        start: usize,
        end: usize,
    };
    pub const Tag = enum {
        sym,
        num,
        str,
        /// t
        t,
        /// nil
        nil,
        /// =
        assign,
        /// -=
        assign_sub,
        /// /=
        assign_div,
        /// *=
        assign_mul,
        /// +=
        assign_add,
        /// ==
        op_eq,
        /// >
        op_gt,
        /// >=
        op_geq,
        /// <
        op_lt,
        /// <=
        op_leq,
        /// /
        op_div,
        /// *
        op_mul,
        /// +
        op_add,
        /// -
        op_sub,
        /// ->
        op_acc,
        /// ~>
        op_derefacc,
        /// %
        op_mod,
        /// !
        op_not,
        /// !=
        op_neq,
        /// ||
        op_or,
        /// &&
        op_and,
        /// (
        list_l,
        /// '(
        list_lz,
        /// )
        list_r,
        /// @keys
        kw_keys,
        /// @rest
        kw_rest,
    };
    pub fn format(self: *const Token, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
        try writer.print("{d}:{d} .{s}", .{ self.loc.start, self.loc.end, @tagName(self.tag) });
    }
 };
 pub const Tokenizer = struct {
    buffer: [:0]const u8,
    index: usize,
    start: usize,
    const State = enum {
        start,
        alphanum_identifier,
        number_or_float,
        decimals,
        signed_exponent,
        unsigned_exponent,
        string,
        op_plus,
        op_minus,
        op_star,
        op_fslash,
        op_pipe,
        op_amp,
        op_excl,
        op_deref,
        op_eq,
        list_l,
        list_lz,
        list_r,
    };
    pub fn init(buf: [:0]const u8) Tokenizer {
        return .{
            .buffer = buf,
            .index = 0,
            .start = 0,
        };
    }
    pub fn next(self: *Tokenizer) ?Token {
        var state: State = .start;
        while (self.index < self.buffer.len) : (self.index += 1) {
            const c = self.buffer[self.index];
            const loc = Token.Loc{ .start = self.start, .end = self.index };
            state = switch (state) {
                .start => blk: {
                    self.start = self.index;
                    break :blk switch (c) {
                        'a'...'z', 'A'...'Z', '_' => .alphanum_identifier,
                        '0'...'9' => .number_or_float,
                        '.' => .decimals,
                        '"' => .string,
                        '+' => .op_plus,
                        '-' => .op_minus,
                        '*' => .op_star,
                        '/' => .op_fslash,
                        '|' => .op_pipe,
                        '&' => .op_amp,
                        '!' => .op_excl,
                        '~' => .op_deref,
                        '=' => .op_eq,
                        '(' => .list_l,
                        ')' => .list_r,
                        '\'' => .list_lz,
                        else => .start,
                    };
                },
                .alphanum_identifier => switch (c) {
                    'a'...'z', 'A'...'Z', '0'...'9', '_' => .alphanum_identifier,
                    else => {
                        inline for (.{ Token.Tag.t, Token.Tag.nil }) |alphanum_tag| {
                            if (std.mem.eql(u8, self.buffer[self.start..self.index], @tagName(alphanum_tag))) {
                                return Token{ .tag = alphanum_tag, .loc = loc };
                            }
                        }
                        return Token{ .tag = .sym, .loc = loc };
                    },
                },
                .number_or_float => switch (c) {
                    '0'...'9' => .number_or_float,
                    '.' => .decimals,
                    'e' => .signed_exponent,
                    ' ', '\n' => {
                        return Token{ .tag = .num, .loc = loc };
                    },
                    else => unreachable,
                },
                .decimals => switch (c) {
                    '0'...'9' => .decimals,
                    ' ', '\n' => {
                        return Token{ .tag = .num, .loc = loc };
                    },
                    else => unreachable,
                },
                .signed_exponent => switch (c) {
                    '0'...'9', '+', '-' => .unsigned_exponent,
                    else => unreachable,
                },
                .unsigned_exponent => switch (c) {
                    '0'...'9' => .unsigned_exponent,
                    ' ', '\n' => {
                        return Token{ .tag = .num, .loc = loc };
                    },
                    else => unreachable,
                },
                .string => switch (c) {
                    '"' => {
                        return Token{ .tag = .str, .loc = loc };
                    },
                    '\\' => blk: {
                        self.index += 1;
                        break :blk .string;
                    },
                    else => .string,
                },
                .op_plus, .op_minus, .op_fslash, .op_star, .op_excl, .op_eq => switch (c) {
                    '=' => {
                        return Token{ .tag = switch (state) {
                            .op_plus => .assign_add,
                            .op_minus => .assign_sub,
                            .op_star => .assign_mul,
                            .op_fslash => .assign_div,
                            .op_excl => .op_neq,
                            .op_eq => .op_eq,
                            else => unreachable,
                        }, .loc = loc };
                    },
                    ' ', '\n' => {
                        return Token{ .tag = switch (state) {
                            .op_plus => .op_add,
                            .op_minus => .op_sub,
                            .op_star => .op_mul,
                            .op_fslash => .op_div,
                            .op_excl => .op_not,
                            .op_eq => .assign,
                            else => unreachable,
                        }, .loc = loc };
                    },
                    '>' => {
                        return Token{ .tag = switch (state) {
                            .op_minus => .op_acc,
                            else => unreachable,
                        }, .loc = loc };
                    },
                    else => unreachable,
                },
                .op_pipe => switch (c) {
                    '|' => {
                        return Token{ .tag = .op_or, .loc = loc };
                    },
                    else => unreachable,
                },
                .op_amp => switch (c) {
                    '&' => {
                        return Token{ .tag = .op_and, .loc = loc };
                    },
                    else => unreachable,
                },
                .op_deref => switch (c) {
                    '>' => {
                        return Token{ .tag = .op_derefacc, .loc = loc };
                    },
                    else => unreachable,
                },
                .list_l => {
                    return Token{ .tag = .list_l, .loc = loc };
                },
                .list_r => {
                    return Token{ .tag = .list_r, .loc = loc };
                },
                .list_lz => switch (c) {
                    '(' => {
                        return Token{ .tag = .op_derefacc, .loc = loc };
                    },
                    else => unreachable,
                },
            };
        }
        return null;
    }
 };
 test "simple tokenization" {
    const example =
        \\t
        \\nil
        \\a = b
        \\"some string w/ escaped\""
    ;
    var tokz = Tokenizer.init(example);
    try std.testing.expectEqual(Token{ .loc = .{ .start = 0, .end = 1 }, .tag = .t }, tokz.next());
    try std.testing.expectEqual(Token{ .loc = .{ .start = 2, .end = 5 }, .tag = .nil }, tokz.next());
    try std.testing.expectEqual(Token{ .loc = .{ .start = 6, .end = 7 }, .tag = .sym }, tokz.next());
    try std.testing.expectEqual(Token{ .loc = .{ .start = 8, .end = 9 }, .tag = .assign }, tokz.next());
    try std.testing.expectEqual(Token{ .loc = .{ .start = 10, .end = 11 }, .tag = .sym }, tokz.next());
    try std.testing.expectEqual(Token{ .loc = .{ .start = 12, .end = 37 }, .tag = .str }, tokz.next());
 }