try zig

2025-01-16 20:27:36 +01:00 · 2025-01-16 20:27:36 +01:00 · 9e5d32a420
parent 56883c0bed
commit 9e5d32a420
10 changed files with 718 additions and 28 deletions
--- a/build.zig.zon
+++ b/build.zig.zon
@ -15,6 +15,9 @@
    // Once all dependencies are fetched, `zig build` no longer requires
    // internet connectivity.
    .dependencies = .{
+        .lsfw = .{
+            .path = "lib/lsfw",
+        }
        // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
        //.example = .{
        //    // When updating this field to a new URL, be sure to delete the corresponding
--- a/data/example.il
+++ b/data/example.il
@ -13,6 +13,7 @@ b_var->a
  ; some struff to do
 )

+
 "srting"

 "wqdwd\"qwesfwf"
--- a/src/classifier.zig
+++ b/src/classifier.zig
@ -0,0 +1,91 @@
+const tkz = @import("tokenize.zig");
+const hlp = @import("helpers.zig");
+const std = @import("std");
+
+pub const TokenClass = enum {
+    symbol,
+    string,
+    comment,
+    docstring,
+    number,
+    nil,
+    t,
+    list_start,
+    list_lazy_start,
+    list_end,
+    operator,
+};
+
+pub const ClassifiedToken = struct {
+    tok: tkz.Token,
+    cls: TokenClass,
+};
+
+const operators = std.ComptimeStringMap(void, .{
+    .{"->"},
+    .{"~>"},
+    .{"/="},
+    .{"*="},
+    .{"-="},
+    .{"+="},
+    .{"||"},
+    .{"&&"},
+    .{"="},
+    .{"+"},
+    .{"-"},
+    .{"*"},
+    .{"/"},
+    .{"~"},
+    .{"%"},
+    .{"@keys"},
+    .{"@rest"},
+});
+
+const numbers = std.ComptimeStringMap(void, .{
+    .{"0"},
+    .{"1"},
+    .{"2"},
+    .{"3"},
+    .{"4"},
+    .{"5"},
+    .{"6"},
+    .{"7"},
+    .{"8"},
+    .{"9"},
+});
+
+fn classify(tok: tkz.Token) ClassifiedToken {
+    return ClassifiedToken{
+        .tok = tok,
+        .cls = if (operators.has(tok.value))
+            TokenClass.operator
+        else if (std.mem.eql(u8, "'(", tok.value))
+            TokenClass.list_lazy_start
+        else if (std.mem.eql(u8, "(", tok.value))
+            TokenClass.list_start
+        else if (std.mem.eql(u8, ")", tok.value))
+            TokenClass.list_end
+        else if (std.mem.eql(u8, "\"", tok.value[0..1]))
+            TokenClass.string
+        else if (std.mem.eql(u8, "nil", tok.value))
+            TokenClass.nil
+        else if (std.mem.eql(u8, "t", tok.value))
+            TokenClass.t
+        else if (numbers.has(tok.value[0..1]))
+            TokenClass.number
+        else if (std.mem.eql(u8, ";", tok.value[0..1]))
+            if (tok.value.len >= 3 and std.mem.eql(u8, ";;;", tok.value[0..3])) TokenClass.docstring else TokenClass.comment
+        else
+            TokenClass.symbol,
+    };
+}
+
+pub fn classifyTokens(toks: []const tkz.Token, allocator: std.mem.Allocator) !std.ArrayList(ClassifiedToken) {
+    var ctoks = std.ArrayList(ClassifiedToken).init(allocator);
+
+    for (toks) |tok| {
+        try ctoks.append(classify(tok));
+    }
+
+    return ctoks;
+}
--- a/src/helpers.zig
+++ b/src/helpers.zig
@ -0,0 +1,9 @@
+const std = @import("std");
+pub fn isPartOf(comptime T: type, haystack: [][]const T, needle: []const T) bool {
+    for (haystack) |straw| {
+        if (std.mem.eql(u8, straw, needle[0..straw.len])) {
+            return true;
+        }
+    }
+    return false;
+}
--- a/src/lsp.zig
+++ b/src/lsp.zig
@ -2,17 +2,74 @@ const std = @import("std");
 const lsp_types = @import("lsfw/src/types.zig");
 const lsp = @import("lsfw/src/lsp.zig");
 const lsp_doc = @import("lsfw/src/document.zig");
+const lsp_log = @import("lsfw/src/logger.zig");
+const tkz = @import("tokenize.zig");
+const cls = @import("classifier.zig");

-const State = struct {};
+const State = struct { symbols: std.ArrayList(cls.ClassifiedToken) };
 const Lsp = lsp.Lsp(State);
+const Scope = enum { hi };
+
+fn handleHover(allocator: std.mem.Allocator, ctx: *Lsp.Context, pos: lsp_types.Position) ?[]const u8 {
+    if (null == ctx.state) {
+        lsp_log.notify(.info, "could not find token under cursor (at {})", .{pos});
+        return null;
+    } else if (0 == ctx.state.?.symbols.items.len) {
+        handleDocOpen(allocator, ctx);
+    }
+    lsp_log.notify(.err, "{}", .{ctx.state.?.symbols});
+    // for (ctx.state.?.symbols.items) |tok| {
+    //     if (tok.tok.line == pos.line and tok.tok.char <= pos.character and (tok.tok.char + tok.tok.value.len) >= pos.character) {
+    //         lsp_log.notify(.info, "{}", .{tok});
+    //         break;
+    //     }
+    // }

-fn handleHover(allocator: std.mem.Allocator, context: *Lsp.Context, position: lsp_types.Position) ?[]const u8 {
-    _ = allocator;
-    _ = position;
-    _ = context;
    return null;
 }

+fn handleCompletion(allocator: std.mem.Allocator, context: *Lsp.Context, position: lsp_types.Position) ?lsp_types.CompletionList {
+    _ = context;
+    _ = position;
+    var completions = std.ArrayList(lsp_types.CompletionItem).init(allocator);
+    if (std.mem.Allocator.Error.OutOfMemory == completions.append(.{
+        .label = "(procedure)",
+        .insertText = "(procedure ${1:func_name}($2)\n\n)",
+        .insertTextFormat = .Snippet,
+        .kind = .Function,
+    })) {
+        return null;
+    }
+
+    return .{ .items = completions.items };
+}
+
+fn handleDocOpen(allocator: std.mem.Allocator, context: *Lsp.Context) void {
+    lsp_log.notify(.err, "opened doc {s}", .{context.document.uri});
+    const content = context.document.text;
+    const toks = tkz.tokenizeContent(content, allocator) catch unreachable;
+    // const toks = std.ArrayList(tkz.Token).init(allocator);
+    lsp_log.notify(.err, "toks {}", .{toks});
+    // defer toks.deinit();
+    const ctoks = cls.classifyTokens(toks.items, allocator) catch unreachable;
+    lsp_log.notify(.err, "ctoks {}", .{ctoks});
+    // defer ctoks.deinit();
+    // const ast = try stx.generateSyntaxTree(ctoks);
+
+    lsp_log.notify(.info, "opened {s}, found {d} tokens", .{ context.document.uri, ctoks.items.len });
+    if (context.state != null) {
+        context.state.?.symbols.deinit();
+    }
+    context.state = .{
+        .symbols = std.ArrayList(cls.ClassifiedToken).init(allocator),
+    };
+}
+fn handleDocChanged(allocator: std.mem.Allocator, context: *Lsp.Context, _: []lsp_types.ChangeEvent) void {
+    handleDocOpen(allocator, context);
+}
+
+fn handleDocClose(_: std.mem.Allocator, _: *Lsp.Context) void {}
+
 pub fn start() !u8 {
    const descr = lsp_types.ServerData{
        .serverInfo = .{
@ -21,8 +78,13 @@ pub fn start() !u8 {
        },
    };

-    var server = Lsp.init(std.heap.page_allocator, descr);
+    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+    var server = Lsp.init(gpa.allocator(), descr);

    server.registerHoverCallback(handleHover);
+    server.registerCompletionCallback(handleCompletion);
+    server.registerDocOpenCallback(handleDocOpen);
+    server.registerDocChangeCallback(handleDocChanged);
+    server.registerDocCloseCallback(handleDocClose);
    return server.start();
 }
--- a/src/main.zig
+++ b/src/main.zig
@ -1,18 +1,35 @@
 const std = @import("std");
-const tkz = @import("tokenize.zig");
+const tkz = @import("tokenizer.zig");
+// const cls = @import("classifier.zig");
+// const stx = @import("syntax.zig");
+const lsp = @import("lsp.zig");

 pub fn main() !void {
-    var file = try std.fs.cwd().openFile("data/example.il", .{});
-    defer file.close();
-
-    const content = try file.readToEndAlloc(std.heap.page_allocator, 4096 * ((1 << 10) << 10));
-
-    const toks = try tkz.tokenizeContent(content);
-    for (toks.items) |tok| {
-        std.debug.print("{}:{} `{s}`\n", .{
-            tok.line,
-            tok.char,
-            tok.value,
-        });
-    }
+    // var file = try std.fs.cwd().openFile("data/example.il", .{});
+    // defer file.close();
+    //
+    // const content = try file.readToEndAlloc(std.heap.page_allocator, 4096 * ((1 << 10) << 10));
+    //
+    // const toks = try tkz.tokenizeContent(content);
+    // // for (toks.items) |tok| {
+    // //     std.debug.print("{}:{} `{s}`\n", .{
+    // //         tok.line,
+    // //         tok.char,
+    // //         tok.value,
+    // //     });
+    // // }
+    //
+    // const ctoks = try cls.classifyTokens(toks);
+    // // for (ctoks.items) |ctok| {
+    // //     std.debug.print("{}:{}\t`{s:<40}`({})\n", .{
+    // //         ctok.tok.line,
+    // //         ctok.tok.char,
+    // //         ctok.tok.value,
+    // //         ctok.cls,
+    // //     });
+    // // }
+    // const ast = try stx.generateSyntaxTree(ctoks);
+    // std.debug.print("{}\n", .{ast});
+    //
+    _ = try lsp.start();
 }
--- a/src/parser.zig
+++ b/src/parser.zig
@ -0,0 +1,178 @@
+const std = @import("std");
+const toks = @import("tokenizer.zig");
+
+pub const ParseError = error{ no_fn_name, no_fn_params };
+
+pub const Tag = enum {
+    ///expression
+    ///`<rhs...>`
+    ///
+    ///lhs ignored
+    expr,
+
+    ///variable assignment
+    ///`<lhs> = <rhs...>`
+    ///
+    ///lhs is overwritten to be variable
+    var_assign,
+
+    ///lazy evaluated list
+    ///`'(<rhs...>)`
+    ///
+    ///lhs ignored
+    llist,
+
+    ///list (evaluated)
+    ///`(<lhs> <rhs...>)`
+    ///
+    ///lhs needs to be a callable
+    list_eval,
+
+    ///fn_def (procedure)
+    ///`;;; <lhs>
+    ///(procedure <main_token>(<lhs>) <rhs...>)`
+    fn_def,
+};
+
+pub const Node = struct {
+    tag: Tag,
+    main_token: Index,
+    data: Data,
+
+    pub const Data = struct {
+        lhs: Index,
+        rhs: Index,
+    };
+    pub const Index = u32;
+};
+pub const AstError = error{};
+
+pub const Parser = struct {
+    gpa: std.mem.Allocator,
+    source: [:0]const u8,
+
+    token_tags: []const toks.Token.Tag,
+    token_locs: []const toks.Token.Loc,
+    tok_i: Node.Index,
+
+    errs: std.ArrayList(AstError),
+    nodes: std.MultiArrayList(Node),
+    extra_data: std.ArrayList(Node.Index),
+    scratch: std.ArrayList(Node.Index),
+
+    pub fn init(buffer: [:0]const u8, mal: std.MultiArrayList(toks.Token), allocator: std.mem.Allocator) !Parser {
+        return .{
+            .gpa = allocator,
+            .source = buffer,
+
+            .token_tags = mal.items(.tag),
+            .token_locs = mal.items(.loc),
+            .tok_i = 0,
+
+            .errs = std.ArrayList(AstError).init(allocator),
+            .nodes = std.MultiArrayList(Node){},
+            .extra_data = std.ArrayList(Node.Index).init(allocator),
+            .scratch = std.ArrayList(Node.Index).init(allocator),
+        };
+    }
+
+    fn hasToken(self: *Parser, expected: toks.Token.Tag, offset: isize) ?toks.Token {
+        if (self.token_tags[@intCast(self.tok_i + offset)] == expected) {
+            return .{ .loc = self.token_locs[@intCast(self.tok_i + offset)], .tag = self.token_tags[@intCast(self.tok_i + offset)] };
+        }
+        return null;
+    }
+    fn eatToken(self: *Parser, expected: toks.Token.Tag) ?Node.Index {
+        const tok = self.hasToken(expected, 0);
+        if (tok != null) {
+            self.tok_i += 1;
+            return self.tok_i - 1;
+        }
+        return null;
+    }
+
+    fn parse_fn_proc(self: *Parser) ?Node {
+        _ = self.eatToken(.sym);
+        if (self.hasToken(.list_l, -2) != null) {
+            // lisp style
+        } else if (self.eatToken(.list_l) != null) {
+            // c style
+        } else {
+            // not a procedure call or invalid syntax?
+        }
+
+        const name = self.eatToken(.sym) orelse return null;
+        std.debug.print("found procedure def for `{s}`", .{self.source[self.token_locs[name].start..self.token_locs[name].end]});
+        _ = self.eatToken(.list_l) orelse return null;
+        var open_lists: usize = 0;
+        while (true) : (self.tok_i += 1) {
+            switch (self.token_tags[self.tok_i]) {
+                .list_l, .list_lz => {
+                    open_lists += 1;
+                },
+                .list_r => {
+                    if (open_lists > 0) {
+                        open_lists -= 1;
+                    } else {
+                        break;
+                    }
+                },
+                else => {},
+            }
+        }
+
+        while (true) : (self.tok_i += 1) {
+            switch (self.token_tags[self.tok_i]) {
+                .list_l, .list_lz => {
+                    open_lists += 1;
+                },
+                .list_r => {
+                    if (open_lists > 0) {
+                        open_lists -= 1;
+                    } else {
+                        break;
+                    }
+                },
+                else => {},
+            }
+        }
+        self.tok_i += 1;
+
+        return Node{ .tag = .fn_def, .main_token = name, .data = .{ .lhs = 0, .rhs = 0 } };
+    }
+
+    pub fn next(self: *Parser) ?Node {
+        while (self.tok_i < self.token_tags.len) : (self.tok_i += 1) {
+            switch (self.token_tags[self.tok_i]) {
+                toks.Token.Tag.sym => {
+                    if (std.mem.eql(u8, "procedure", self.source[self.token_locs[self.tok_i].start..self.token_locs[self.tok_i].end])) {
+                        return self.parse_fn_proc();
+                    }
+                },
+                else => {},
+            }
+        }
+        return null;
+    }
+};
+test "parsing of simple example" {
+    const example =
+        \\t
+        \\nil
+        \\a = b
+        \\"some string w/ escaped\""
+        \\(procedure a() )
+    ;
+
+    var tokz = toks.Tokenizer.init(example);
+    var tokens = std.MultiArrayList(toks.Token){};
+    defer tokens.deinit(std.testing.allocator);
+    while (tokz.next()) |tok| {
+        try tokens.append(std.testing.allocator, tok);
+        std.debug.print("{}\n", .{tok});
+    }
+    var parse = try Parser.init(example, tokens, std.testing.allocator);
+    while (parse.next()) |ast_node| {
+        std.debug.print("{}\n", .{ast_node});
+    }
+}
--- a/src/syntax.zig
+++ b/src/syntax.zig
@ -0,0 +1,55 @@
+const std = @import("std");
+const cls = @import("classifier.zig");
+
+pub const SyntaxNode = struct {
+    ctok: cls.ClassifiedToken,
+    nodes: ?std.ArrayList(SyntaxNode),
+};
+
+pub fn generateSyntaxTree(ctoks: std.ArrayList(cls.ClassifiedToken)) !std.ArrayList(SyntaxNode) {
+    var nodes = std.ArrayList(SyntaxNode).init(std.heap.page_allocator);
+    var actives = std.ArrayList(SyntaxNode).init(std.heap.page_allocator);
+
+    for (ctoks.items) |ctok| {
+        switch (ctok.cls) {
+            cls.TokenClass.comment, cls.TokenClass.docstring => {
+                try nodes.append(.{
+                    .ctok = ctok,
+                    .nodes = null,
+                });
+            },
+            cls.TokenClass.list_start, cls.TokenClass.list_lazy_start => {
+                try actives.append(.{
+                    .ctok = ctok,
+                    .nodes = std.ArrayList(SyntaxNode).init(std.heap.page_allocator),
+                });
+            },
+            cls.TokenClass.list_end => {
+                if (actives.items.len > 0) {
+                    try nodes.append(actives.pop());
+                } else {
+                    std.debug.print("{}\n", .{actives});
+                }
+            },
+            else => {
+                const active_top = actives.popOrNull();
+                if (active_top != null) {
+                    var active = active_top.?;
+                    var actives_nodes: std.ArrayList(SyntaxNode) = undefined;
+                    if (active.nodes != null) {
+                        actives_nodes = active.nodes.?;
+                    } else {
+                        active.nodes = std.ArrayList(SyntaxNode).init(std.heap.page_allocator);
+                        actives_nodes = active.nodes.?;
+                    }
+                    try actives_nodes.append(.{
+                        .ctok = ctok,
+                        .nodes = null,
+                    });
+                } else {}
+            },
+        }
+    }
+
+    return nodes;
+}
--- a/src/tokenize.zig
+++ b/src/tokenize.zig
@ -1,6 +1,7 @@
 const std = @import("std");
+const lsp = @import("lsfw/src/lsp.zig");

-const Token = struct {
+pub const Token = struct {
    /// 0-based index of token start in whole file
    start: usize,
    /// 1-based line numbert token starts at
@ -13,9 +14,10 @@ const Token = struct {

 const TokenizationError = error{InvalidKeyword};

-pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
-    var toks = std.ArrayList(Token).init(std.heap.page_allocator);
-    var lines = std.ArrayList(usize).init(std.heap.page_allocator);
+pub fn tokenizeContent(content: []u8, allocator: std.mem.Allocator) !std.ArrayList(Token) {
+    var toks = std.ArrayList(Token).init(allocator);
+    var lines = std.ArrayList(usize).init(allocator);
+    defer lines.deinit();

    var index: usize = 0;
    while (index < content.len) {
@ -68,7 +70,7 @@ pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {

                if (std.mem.eql(u8, "@keys", content[index .. index + l])) {} else if (std.mem.eql(u8, "@rest", content[index .. index + l])) {} else {
                    std.debug.print("line={d}, char={d}\n", .{
-                        .line = lines.items.len + 1,
+                        .line = lines.items.len,
                        .char = switch (lines.items.len) {
                            0 => index,
                            else => index - lines.items[lines.items.len - 1],
@ -83,8 +85,8 @@ pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
        };
        try toks.append(.{
            .start = index,
-            .value = content[index .. index + l],
-            .line = lines.items.len + 1,
+            .value = try allocator.dupe(u8, content[index .. index + l]),
+            .line = lines.items.len,
            .char = switch (lines.items.len) {
                0 => index,
                else => index - lines.items[lines.items.len - 1],
@ -92,6 +94,6 @@ pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
        });
        index += l;
    }
-
+    lsp.logger.notify(.err, "done with initial tokenization, generated {d} tokens", .{toks.items.len});
    return toks;
 }
--- a/src/tokenizer.zig
+++ b/src/tokenizer.zig
@ -0,0 +1,272 @@
+const std = @import("std");
+
+pub const Token = struct {
+    tag: Tag,
+    loc: Loc,
+
+    pub const Loc = struct {
+        start: usize,
+        end: usize,
+    };
+
+    pub const Tag = enum {
+        sym,
+        num,
+        str,
+        /// t
+        t,
+        /// nil
+        nil,
+        /// =
+        assign,
+        /// -=
+        assign_sub,
+        /// /=
+        assign_div,
+        /// *=
+        assign_mul,
+        /// +=
+        assign_add,
+        /// ==
+        op_eq,
+        /// >
+        op_gt,
+        /// >=
+        op_geq,
+        /// <
+        op_lt,
+        /// <=
+        op_leq,
+        /// /
+        op_div,
+        /// *
+        op_mul,
+        /// +
+        op_add,
+        /// -
+        op_sub,
+        /// ->
+        op_acc,
+        /// ~>
+        op_derefacc,
+        /// %
+        op_mod,
+        /// !
+        op_not,
+        /// !=
+        op_neq,
+        /// ||
+        op_or,
+        /// &&
+        op_and,
+        /// (
+        list_l,
+        /// '(
+        list_lz,
+        /// )
+        list_r,
+        /// @keys
+        kw_keys,
+        /// @rest
+        kw_rest,
+    };
+
+    pub fn format(self: *const Token, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
+        try writer.print("{d}:{d} .{s}", .{ self.loc.start, self.loc.end, @tagName(self.tag) });
+    }
+};
+
+pub const Tokenizer = struct {
+    buffer: [:0]const u8,
+    index: usize,
+    start: usize,
+
+    const State = enum {
+        start,
+        alphanum_identifier,
+        number_or_float,
+        decimals,
+        signed_exponent,
+        unsigned_exponent,
+        string,
+        op_plus,
+        op_minus,
+        op_star,
+        op_fslash,
+        op_pipe,
+        op_amp,
+        op_excl,
+        op_deref,
+        op_eq,
+        list_l,
+        list_lz,
+        list_r,
+    };
+
+    pub fn init(buf: [:0]const u8) Tokenizer {
+        return .{
+            .buffer = buf,
+            .index = 0,
+            .start = 0,
+        };
+    }
+
+    pub fn next(self: *Tokenizer) ?Token {
+        var state: State = .start;
+        while (self.index < self.buffer.len) : (self.index += 1) {
+            const c = self.buffer[self.index];
+            const loc = Token.Loc{ .start = self.start, .end = self.index };
+            state = switch (state) {
+                .start => blk: {
+                    self.start = self.index;
+                    break :blk switch (c) {
+                        'a'...'z', 'A'...'Z', '_' => .alphanum_identifier,
+                        '0'...'9' => .number_or_float,
+                        '.' => .decimals,
+                        '"' => .string,
+                        '+' => .op_plus,
+                        '-' => .op_minus,
+                        '*' => .op_star,
+                        '/' => .op_fslash,
+                        '|' => .op_pipe,
+                        '&' => .op_amp,
+                        '!' => .op_excl,
+                        '~' => .op_deref,
+                        '=' => .op_eq,
+                        '(' => .list_l,
+                        ')' => .list_r,
+                        '\'' => .list_lz,
+                        else => .start,
+                    };
+                },
+                .alphanum_identifier => switch (c) {
+                    'a'...'z', 'A'...'Z', '0'...'9', '_' => .alphanum_identifier,
+                    else => {
+                        inline for (.{ Token.Tag.t, Token.Tag.nil }) |alphanum_tag| {
+                            if (std.mem.eql(u8, self.buffer[self.start..self.index], @tagName(alphanum_tag))) {
+                                return Token{ .tag = alphanum_tag, .loc = loc };
+                            }
+                        }
+                        return Token{ .tag = .sym, .loc = loc };
+                    },
+                },
+                .number_or_float => switch (c) {
+                    '0'...'9' => .number_or_float,
+                    '.' => .decimals,
+                    'e' => .signed_exponent,
+                    ' ', '\n' => {
+                        return Token{ .tag = .num, .loc = loc };
+                    },
+                    else => unreachable,
+                },
+                .decimals => switch (c) {
+                    '0'...'9' => .decimals,
+                    ' ', '\n' => {
+                        return Token{ .tag = .num, .loc = loc };
+                    },
+                    else => unreachable,
+                },
+                .signed_exponent => switch (c) {
+                    '0'...'9', '+', '-' => .unsigned_exponent,
+                    else => unreachable,
+                },
+                .unsigned_exponent => switch (c) {
+                    '0'...'9' => .unsigned_exponent,
+                    ' ', '\n' => {
+                        return Token{ .tag = .num, .loc = loc };
+                    },
+                    else => unreachable,
+                },
+                .string => switch (c) {
+                    '"' => {
+                        return Token{ .tag = .str, .loc = loc };
+                    },
+                    '\\' => blk: {
+                        self.index += 1;
+                        break :blk .string;
+                    },
+                    else => .string,
+                },
+                .op_plus, .op_minus, .op_fslash, .op_star, .op_excl, .op_eq => switch (c) {
+                    '=' => {
+                        return Token{ .tag = switch (state) {
+                            .op_plus => .assign_add,
+                            .op_minus => .assign_sub,
+                            .op_star => .assign_mul,
+                            .op_fslash => .assign_div,
+                            .op_excl => .op_neq,
+                            .op_eq => .op_eq,
+                            else => unreachable,
+                        }, .loc = loc };
+                    },
+                    ' ', '\n' => {
+                        return Token{ .tag = switch (state) {
+                            .op_plus => .op_add,
+                            .op_minus => .op_sub,
+                            .op_star => .op_mul,
+                            .op_fslash => .op_div,
+                            .op_excl => .op_not,
+                            .op_eq => .assign,
+                            else => unreachable,
+                        }, .loc = loc };
+                    },
+                    '>' => {
+                        return Token{ .tag = switch (state) {
+                            .op_minus => .op_acc,
+                            else => unreachable,
+                        }, .loc = loc };
+                    },
+                    else => unreachable,
+                },
+                .op_pipe => switch (c) {
+                    '|' => {
+                        return Token{ .tag = .op_or, .loc = loc };
+                    },
+                    else => unreachable,
+                },
+                .op_amp => switch (c) {
+                    '&' => {
+                        return Token{ .tag = .op_and, .loc = loc };
+                    },
+                    else => unreachable,
+                },
+                .op_deref => switch (c) {
+                    '>' => {
+                        return Token{ .tag = .op_derefacc, .loc = loc };
+                    },
+                    else => unreachable,
+                },
+                .list_l => {
+                    return Token{ .tag = .list_l, .loc = loc };
+                },
+                .list_r => {
+                    return Token{ .tag = .list_r, .loc = loc };
+                },
+                .list_lz => switch (c) {
+                    '(' => {
+                        return Token{ .tag = .op_derefacc, .loc = loc };
+                    },
+                    else => unreachable,
+                },
+            };
+        }
+        return null;
+    }
+};
+
+test "simple tokenization" {
+    const example =
+        \\t
+        \\nil
+        \\a = b
+        \\"some string w/ escaped\""
+    ;
+
+    var tokz = Tokenizer.init(example);
+    try std.testing.expectEqual(Token{ .loc = .{ .start = 0, .end = 1 }, .tag = .t }, tokz.next());
+    try std.testing.expectEqual(Token{ .loc = .{ .start = 2, .end = 5 }, .tag = .nil }, tokz.next());
+    try std.testing.expectEqual(Token{ .loc = .{ .start = 6, .end = 7 }, .tag = .sym }, tokz.next());
+    try std.testing.expectEqual(Token{ .loc = .{ .start = 8, .end = 9 }, .tag = .assign }, tokz.next());
+    try std.testing.expectEqual(Token{ .loc = .{ .start = 10, .end = 11 }, .tag = .sym }, tokz.next());
+    try std.testing.expectEqual(Token{ .loc = .{ .start = 12, .end = 37 }, .tag = .str }, tokz.next());
+}