try zig
This commit is contained in:
parent
56883c0bed
commit
9e5d32a420
|
@ -15,6 +15,9 @@
|
|||
// Once all dependencies are fetched, `zig build` no longer requires
|
||||
// internet connectivity.
|
||||
.dependencies = .{
|
||||
.lsfw = .{
|
||||
.path = "lib/lsfw",
|
||||
}
|
||||
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
|
||||
//.example = .{
|
||||
// // When updating this field to a new URL, be sure to delete the corresponding
|
||||
|
|
|
@ -13,6 +13,7 @@ b_var->a
|
|||
; some struff to do
|
||||
)
|
||||
|
||||
|
||||
"srting"
|
||||
|
||||
"wqdwd\"qwesfwf"
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
const tkz = @import("tokenize.zig");
|
||||
const hlp = @import("helpers.zig");
|
||||
const std = @import("std");
|
||||
|
||||
pub const TokenClass = enum {
|
||||
symbol,
|
||||
string,
|
||||
comment,
|
||||
docstring,
|
||||
number,
|
||||
nil,
|
||||
t,
|
||||
list_start,
|
||||
list_lazy_start,
|
||||
list_end,
|
||||
operator,
|
||||
};
|
||||
|
||||
pub const ClassifiedToken = struct {
|
||||
tok: tkz.Token,
|
||||
cls: TokenClass,
|
||||
};
|
||||
|
||||
const operators = std.ComptimeStringMap(void, .{
|
||||
.{"->"},
|
||||
.{"~>"},
|
||||
.{"/="},
|
||||
.{"*="},
|
||||
.{"-="},
|
||||
.{"+="},
|
||||
.{"||"},
|
||||
.{"&&"},
|
||||
.{"="},
|
||||
.{"+"},
|
||||
.{"-"},
|
||||
.{"*"},
|
||||
.{"/"},
|
||||
.{"~"},
|
||||
.{"%"},
|
||||
.{"@keys"},
|
||||
.{"@rest"},
|
||||
});
|
||||
|
||||
const numbers = std.ComptimeStringMap(void, .{
|
||||
.{"0"},
|
||||
.{"1"},
|
||||
.{"2"},
|
||||
.{"3"},
|
||||
.{"4"},
|
||||
.{"5"},
|
||||
.{"6"},
|
||||
.{"7"},
|
||||
.{"8"},
|
||||
.{"9"},
|
||||
});
|
||||
|
||||
fn classify(tok: tkz.Token) ClassifiedToken {
|
||||
return ClassifiedToken{
|
||||
.tok = tok,
|
||||
.cls = if (operators.has(tok.value))
|
||||
TokenClass.operator
|
||||
else if (std.mem.eql(u8, "'(", tok.value))
|
||||
TokenClass.list_lazy_start
|
||||
else if (std.mem.eql(u8, "(", tok.value))
|
||||
TokenClass.list_start
|
||||
else if (std.mem.eql(u8, ")", tok.value))
|
||||
TokenClass.list_end
|
||||
else if (std.mem.eql(u8, "\"", tok.value[0..1]))
|
||||
TokenClass.string
|
||||
else if (std.mem.eql(u8, "nil", tok.value))
|
||||
TokenClass.nil
|
||||
else if (std.mem.eql(u8, "t", tok.value))
|
||||
TokenClass.t
|
||||
else if (numbers.has(tok.value[0..1]))
|
||||
TokenClass.number
|
||||
else if (std.mem.eql(u8, ";", tok.value[0..1]))
|
||||
if (tok.value.len >= 3 and std.mem.eql(u8, ";;;", tok.value[0..3])) TokenClass.docstring else TokenClass.comment
|
||||
else
|
||||
TokenClass.symbol,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn classifyTokens(toks: []const tkz.Token, allocator: std.mem.Allocator) !std.ArrayList(ClassifiedToken) {
|
||||
var ctoks = std.ArrayList(ClassifiedToken).init(allocator);
|
||||
|
||||
for (toks) |tok| {
|
||||
try ctoks.append(classify(tok));
|
||||
}
|
||||
|
||||
return ctoks;
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
const std = @import("std");
|
||||
pub fn isPartOf(comptime T: type, haystack: [][]const T, needle: []const T) bool {
|
||||
for (haystack) |straw| {
|
||||
if (std.mem.eql(u8, straw, needle[0..straw.len])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
74
src/lsp.zig
74
src/lsp.zig
|
@ -2,17 +2,74 @@ const std = @import("std");
|
|||
const lsp_types = @import("lsfw/src/types.zig");
|
||||
const lsp = @import("lsfw/src/lsp.zig");
|
||||
const lsp_doc = @import("lsfw/src/document.zig");
|
||||
const lsp_log = @import("lsfw/src/logger.zig");
|
||||
const tkz = @import("tokenize.zig");
|
||||
const cls = @import("classifier.zig");
|
||||
|
||||
const State = struct {};
|
||||
const State = struct { symbols: std.ArrayList(cls.ClassifiedToken) };
|
||||
const Lsp = lsp.Lsp(State);
|
||||
const Scope = enum { hi };
|
||||
|
||||
fn handleHover(allocator: std.mem.Allocator, ctx: *Lsp.Context, pos: lsp_types.Position) ?[]const u8 {
|
||||
if (null == ctx.state) {
|
||||
lsp_log.notify(.info, "could not find token under cursor (at {})", .{pos});
|
||||
return null;
|
||||
} else if (0 == ctx.state.?.symbols.items.len) {
|
||||
handleDocOpen(allocator, ctx);
|
||||
}
|
||||
lsp_log.notify(.err, "{}", .{ctx.state.?.symbols});
|
||||
// for (ctx.state.?.symbols.items) |tok| {
|
||||
// if (tok.tok.line == pos.line and tok.tok.char <= pos.character and (tok.tok.char + tok.tok.value.len) >= pos.character) {
|
||||
// lsp_log.notify(.info, "{}", .{tok});
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
|
||||
fn handleHover(allocator: std.mem.Allocator, context: *Lsp.Context, position: lsp_types.Position) ?[]const u8 {
|
||||
_ = allocator;
|
||||
_ = position;
|
||||
_ = context;
|
||||
return null;
|
||||
}
|
||||
|
||||
fn handleCompletion(allocator: std.mem.Allocator, context: *Lsp.Context, position: lsp_types.Position) ?lsp_types.CompletionList {
|
||||
_ = context;
|
||||
_ = position;
|
||||
var completions = std.ArrayList(lsp_types.CompletionItem).init(allocator);
|
||||
if (std.mem.Allocator.Error.OutOfMemory == completions.append(.{
|
||||
.label = "(procedure)",
|
||||
.insertText = "(procedure ${1:func_name}($2)\n\n)",
|
||||
.insertTextFormat = .Snippet,
|
||||
.kind = .Function,
|
||||
})) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return .{ .items = completions.items };
|
||||
}
|
||||
|
||||
fn handleDocOpen(allocator: std.mem.Allocator, context: *Lsp.Context) void {
|
||||
lsp_log.notify(.err, "opened doc {s}", .{context.document.uri});
|
||||
const content = context.document.text;
|
||||
const toks = tkz.tokenizeContent(content, allocator) catch unreachable;
|
||||
// const toks = std.ArrayList(tkz.Token).init(allocator);
|
||||
lsp_log.notify(.err, "toks {}", .{toks});
|
||||
// defer toks.deinit();
|
||||
const ctoks = cls.classifyTokens(toks.items, allocator) catch unreachable;
|
||||
lsp_log.notify(.err, "ctoks {}", .{ctoks});
|
||||
// defer ctoks.deinit();
|
||||
// const ast = try stx.generateSyntaxTree(ctoks);
|
||||
|
||||
lsp_log.notify(.info, "opened {s}, found {d} tokens", .{ context.document.uri, ctoks.items.len });
|
||||
if (context.state != null) {
|
||||
context.state.?.symbols.deinit();
|
||||
}
|
||||
context.state = .{
|
||||
.symbols = std.ArrayList(cls.ClassifiedToken).init(allocator),
|
||||
};
|
||||
}
|
||||
fn handleDocChanged(allocator: std.mem.Allocator, context: *Lsp.Context, _: []lsp_types.ChangeEvent) void {
|
||||
handleDocOpen(allocator, context);
|
||||
}
|
||||
|
||||
fn handleDocClose(_: std.mem.Allocator, _: *Lsp.Context) void {}
|
||||
|
||||
pub fn start() !u8 {
|
||||
const descr = lsp_types.ServerData{
|
||||
.serverInfo = .{
|
||||
|
@ -21,8 +78,13 @@ pub fn start() !u8 {
|
|||
},
|
||||
};
|
||||
|
||||
var server = Lsp.init(std.heap.page_allocator, descr);
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
var server = Lsp.init(gpa.allocator(), descr);
|
||||
|
||||
server.registerHoverCallback(handleHover);
|
||||
server.registerCompletionCallback(handleCompletion);
|
||||
server.registerDocOpenCallback(handleDocOpen);
|
||||
server.registerDocChangeCallback(handleDocChanged);
|
||||
server.registerDocCloseCallback(handleDocClose);
|
||||
return server.start();
|
||||
}
|
||||
|
|
45
src/main.zig
45
src/main.zig
|
@ -1,18 +1,35 @@
|
|||
const std = @import("std");
|
||||
const tkz = @import("tokenize.zig");
|
||||
const tkz = @import("tokenizer.zig");
|
||||
// const cls = @import("classifier.zig");
|
||||
// const stx = @import("syntax.zig");
|
||||
const lsp = @import("lsp.zig");
|
||||
|
||||
pub fn main() !void {
|
||||
var file = try std.fs.cwd().openFile("data/example.il", .{});
|
||||
defer file.close();
|
||||
|
||||
const content = try file.readToEndAlloc(std.heap.page_allocator, 4096 * ((1 << 10) << 10));
|
||||
|
||||
const toks = try tkz.tokenizeContent(content);
|
||||
for (toks.items) |tok| {
|
||||
std.debug.print("{}:{} `{s}`\n", .{
|
||||
tok.line,
|
||||
tok.char,
|
||||
tok.value,
|
||||
});
|
||||
}
|
||||
// var file = try std.fs.cwd().openFile("data/example.il", .{});
|
||||
// defer file.close();
|
||||
//
|
||||
// const content = try file.readToEndAlloc(std.heap.page_allocator, 4096 * ((1 << 10) << 10));
|
||||
//
|
||||
// const toks = try tkz.tokenizeContent(content);
|
||||
// // for (toks.items) |tok| {
|
||||
// // std.debug.print("{}:{} `{s}`\n", .{
|
||||
// // tok.line,
|
||||
// // tok.char,
|
||||
// // tok.value,
|
||||
// // });
|
||||
// // }
|
||||
//
|
||||
// const ctoks = try cls.classifyTokens(toks);
|
||||
// // for (ctoks.items) |ctok| {
|
||||
// // std.debug.print("{}:{}\t`{s:<40}`({})\n", .{
|
||||
// // ctok.tok.line,
|
||||
// // ctok.tok.char,
|
||||
// // ctok.tok.value,
|
||||
// // ctok.cls,
|
||||
// // });
|
||||
// // }
|
||||
// const ast = try stx.generateSyntaxTree(ctoks);
|
||||
// std.debug.print("{}\n", .{ast});
|
||||
//
|
||||
_ = try lsp.start();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,178 @@
|
|||
const std = @import("std");
|
||||
const toks = @import("tokenizer.zig");
|
||||
|
||||
pub const ParseError = error{ no_fn_name, no_fn_params };
|
||||
|
||||
pub const Tag = enum {
|
||||
///expression
|
||||
///`<rhs...>`
|
||||
///
|
||||
///lhs ignored
|
||||
expr,
|
||||
|
||||
///variable assignment
|
||||
///`<lhs> = <rhs...>`
|
||||
///
|
||||
///lhs is overwritten to be variable
|
||||
var_assign,
|
||||
|
||||
///lazy evaluated list
|
||||
///`'(<rhs...>)`
|
||||
///
|
||||
///lhs ignored
|
||||
llist,
|
||||
|
||||
///list (evaluated)
|
||||
///`(<lhs> <rhs...>)`
|
||||
///
|
||||
///lhs needs to be a callable
|
||||
list_eval,
|
||||
|
||||
///fn_def (procedure)
|
||||
///`;;; <lhs>
|
||||
///(procedure <main_token>(<lhs>) <rhs...>)`
|
||||
fn_def,
|
||||
};
|
||||
|
||||
pub const Node = struct {
|
||||
tag: Tag,
|
||||
main_token: Index,
|
||||
data: Data,
|
||||
|
||||
pub const Data = struct {
|
||||
lhs: Index,
|
||||
rhs: Index,
|
||||
};
|
||||
pub const Index = u32;
|
||||
};
|
||||
pub const AstError = error{};
|
||||
|
||||
pub const Parser = struct {
|
||||
gpa: std.mem.Allocator,
|
||||
source: [:0]const u8,
|
||||
|
||||
token_tags: []const toks.Token.Tag,
|
||||
token_locs: []const toks.Token.Loc,
|
||||
tok_i: Node.Index,
|
||||
|
||||
errs: std.ArrayList(AstError),
|
||||
nodes: std.MultiArrayList(Node),
|
||||
extra_data: std.ArrayList(Node.Index),
|
||||
scratch: std.ArrayList(Node.Index),
|
||||
|
||||
pub fn init(buffer: [:0]const u8, mal: std.MultiArrayList(toks.Token), allocator: std.mem.Allocator) !Parser {
|
||||
return .{
|
||||
.gpa = allocator,
|
||||
.source = buffer,
|
||||
|
||||
.token_tags = mal.items(.tag),
|
||||
.token_locs = mal.items(.loc),
|
||||
.tok_i = 0,
|
||||
|
||||
.errs = std.ArrayList(AstError).init(allocator),
|
||||
.nodes = std.MultiArrayList(Node){},
|
||||
.extra_data = std.ArrayList(Node.Index).init(allocator),
|
||||
.scratch = std.ArrayList(Node.Index).init(allocator),
|
||||
};
|
||||
}
|
||||
|
||||
fn hasToken(self: *Parser, expected: toks.Token.Tag, offset: isize) ?toks.Token {
|
||||
if (self.token_tags[@intCast(self.tok_i + offset)] == expected) {
|
||||
return .{ .loc = self.token_locs[@intCast(self.tok_i + offset)], .tag = self.token_tags[@intCast(self.tok_i + offset)] };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
fn eatToken(self: *Parser, expected: toks.Token.Tag) ?Node.Index {
|
||||
const tok = self.hasToken(expected, 0);
|
||||
if (tok != null) {
|
||||
self.tok_i += 1;
|
||||
return self.tok_i - 1;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
fn parse_fn_proc(self: *Parser) ?Node {
|
||||
_ = self.eatToken(.sym);
|
||||
if (self.hasToken(.list_l, -2) != null) {
|
||||
// lisp style
|
||||
} else if (self.eatToken(.list_l) != null) {
|
||||
// c style
|
||||
} else {
|
||||
// not a procedure call or invalid syntax?
|
||||
}
|
||||
|
||||
const name = self.eatToken(.sym) orelse return null;
|
||||
std.debug.print("found procedure def for `{s}`", .{self.source[self.token_locs[name].start..self.token_locs[name].end]});
|
||||
_ = self.eatToken(.list_l) orelse return null;
|
||||
var open_lists: usize = 0;
|
||||
while (true) : (self.tok_i += 1) {
|
||||
switch (self.token_tags[self.tok_i]) {
|
||||
.list_l, .list_lz => {
|
||||
open_lists += 1;
|
||||
},
|
||||
.list_r => {
|
||||
if (open_lists > 0) {
|
||||
open_lists -= 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
while (true) : (self.tok_i += 1) {
|
||||
switch (self.token_tags[self.tok_i]) {
|
||||
.list_l, .list_lz => {
|
||||
open_lists += 1;
|
||||
},
|
||||
.list_r => {
|
||||
if (open_lists > 0) {
|
||||
open_lists -= 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
self.tok_i += 1;
|
||||
|
||||
return Node{ .tag = .fn_def, .main_token = name, .data = .{ .lhs = 0, .rhs = 0 } };
|
||||
}
|
||||
|
||||
pub fn next(self: *Parser) ?Node {
|
||||
while (self.tok_i < self.token_tags.len) : (self.tok_i += 1) {
|
||||
switch (self.token_tags[self.tok_i]) {
|
||||
toks.Token.Tag.sym => {
|
||||
if (std.mem.eql(u8, "procedure", self.source[self.token_locs[self.tok_i].start..self.token_locs[self.tok_i].end])) {
|
||||
return self.parse_fn_proc();
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
};
|
||||
test "parsing of simple example" {
|
||||
const example =
|
||||
\\t
|
||||
\\nil
|
||||
\\a = b
|
||||
\\"some string w/ escaped\""
|
||||
\\(procedure a() )
|
||||
;
|
||||
|
||||
var tokz = toks.Tokenizer.init(example);
|
||||
var tokens = std.MultiArrayList(toks.Token){};
|
||||
defer tokens.deinit(std.testing.allocator);
|
||||
while (tokz.next()) |tok| {
|
||||
try tokens.append(std.testing.allocator, tok);
|
||||
std.debug.print("{}\n", .{tok});
|
||||
}
|
||||
var parse = try Parser.init(example, tokens, std.testing.allocator);
|
||||
while (parse.next()) |ast_node| {
|
||||
std.debug.print("{}\n", .{ast_node});
|
||||
}
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
const std = @import("std");
|
||||
const cls = @import("classifier.zig");
|
||||
|
||||
pub const SyntaxNode = struct {
|
||||
ctok: cls.ClassifiedToken,
|
||||
nodes: ?std.ArrayList(SyntaxNode),
|
||||
};
|
||||
|
||||
pub fn generateSyntaxTree(ctoks: std.ArrayList(cls.ClassifiedToken)) !std.ArrayList(SyntaxNode) {
|
||||
var nodes = std.ArrayList(SyntaxNode).init(std.heap.page_allocator);
|
||||
var actives = std.ArrayList(SyntaxNode).init(std.heap.page_allocator);
|
||||
|
||||
for (ctoks.items) |ctok| {
|
||||
switch (ctok.cls) {
|
||||
cls.TokenClass.comment, cls.TokenClass.docstring => {
|
||||
try nodes.append(.{
|
||||
.ctok = ctok,
|
||||
.nodes = null,
|
||||
});
|
||||
},
|
||||
cls.TokenClass.list_start, cls.TokenClass.list_lazy_start => {
|
||||
try actives.append(.{
|
||||
.ctok = ctok,
|
||||
.nodes = std.ArrayList(SyntaxNode).init(std.heap.page_allocator),
|
||||
});
|
||||
},
|
||||
cls.TokenClass.list_end => {
|
||||
if (actives.items.len > 0) {
|
||||
try nodes.append(actives.pop());
|
||||
} else {
|
||||
std.debug.print("{}\n", .{actives});
|
||||
}
|
||||
},
|
||||
else => {
|
||||
const active_top = actives.popOrNull();
|
||||
if (active_top != null) {
|
||||
var active = active_top.?;
|
||||
var actives_nodes: std.ArrayList(SyntaxNode) = undefined;
|
||||
if (active.nodes != null) {
|
||||
actives_nodes = active.nodes.?;
|
||||
} else {
|
||||
active.nodes = std.ArrayList(SyntaxNode).init(std.heap.page_allocator);
|
||||
actives_nodes = active.nodes.?;
|
||||
}
|
||||
try actives_nodes.append(.{
|
||||
.ctok = ctok,
|
||||
.nodes = null,
|
||||
});
|
||||
} else {}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
return nodes;
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
const std = @import("std");
|
||||
const lsp = @import("lsfw/src/lsp.zig");
|
||||
|
||||
const Token = struct {
|
||||
pub const Token = struct {
|
||||
/// 0-based index of token start in whole file
|
||||
start: usize,
|
||||
/// 1-based line numbert token starts at
|
||||
|
@ -13,9 +14,10 @@ const Token = struct {
|
|||
|
||||
const TokenizationError = error{InvalidKeyword};
|
||||
|
||||
pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
|
||||
var toks = std.ArrayList(Token).init(std.heap.page_allocator);
|
||||
var lines = std.ArrayList(usize).init(std.heap.page_allocator);
|
||||
pub fn tokenizeContent(content: []u8, allocator: std.mem.Allocator) !std.ArrayList(Token) {
|
||||
var toks = std.ArrayList(Token).init(allocator);
|
||||
var lines = std.ArrayList(usize).init(allocator);
|
||||
defer lines.deinit();
|
||||
|
||||
var index: usize = 0;
|
||||
while (index < content.len) {
|
||||
|
@ -68,7 +70,7 @@ pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
|
|||
|
||||
if (std.mem.eql(u8, "@keys", content[index .. index + l])) {} else if (std.mem.eql(u8, "@rest", content[index .. index + l])) {} else {
|
||||
std.debug.print("line={d}, char={d}\n", .{
|
||||
.line = lines.items.len + 1,
|
||||
.line = lines.items.len,
|
||||
.char = switch (lines.items.len) {
|
||||
0 => index,
|
||||
else => index - lines.items[lines.items.len - 1],
|
||||
|
@ -83,8 +85,8 @@ pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
|
|||
};
|
||||
try toks.append(.{
|
||||
.start = index,
|
||||
.value = content[index .. index + l],
|
||||
.line = lines.items.len + 1,
|
||||
.value = try allocator.dupe(u8, content[index .. index + l]),
|
||||
.line = lines.items.len,
|
||||
.char = switch (lines.items.len) {
|
||||
0 => index,
|
||||
else => index - lines.items[lines.items.len - 1],
|
||||
|
@ -92,6 +94,6 @@ pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
|
|||
});
|
||||
index += l;
|
||||
}
|
||||
|
||||
lsp.logger.notify(.err, "done with initial tokenization, generated {d} tokens", .{toks.items.len});
|
||||
return toks;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,272 @@
|
|||
const std = @import("std");
|
||||
|
||||
pub const Token = struct {
|
||||
tag: Tag,
|
||||
loc: Loc,
|
||||
|
||||
pub const Loc = struct {
|
||||
start: usize,
|
||||
end: usize,
|
||||
};
|
||||
|
||||
pub const Tag = enum {
|
||||
sym,
|
||||
num,
|
||||
str,
|
||||
/// t
|
||||
t,
|
||||
/// nil
|
||||
nil,
|
||||
/// =
|
||||
assign,
|
||||
/// -=
|
||||
assign_sub,
|
||||
/// /=
|
||||
assign_div,
|
||||
/// *=
|
||||
assign_mul,
|
||||
/// +=
|
||||
assign_add,
|
||||
/// ==
|
||||
op_eq,
|
||||
/// >
|
||||
op_gt,
|
||||
/// >=
|
||||
op_geq,
|
||||
/// <
|
||||
op_lt,
|
||||
/// <=
|
||||
op_leq,
|
||||
/// /
|
||||
op_div,
|
||||
/// *
|
||||
op_mul,
|
||||
/// +
|
||||
op_add,
|
||||
/// -
|
||||
op_sub,
|
||||
/// ->
|
||||
op_acc,
|
||||
/// ~>
|
||||
op_derefacc,
|
||||
/// %
|
||||
op_mod,
|
||||
/// !
|
||||
op_not,
|
||||
/// !=
|
||||
op_neq,
|
||||
/// ||
|
||||
op_or,
|
||||
/// &&
|
||||
op_and,
|
||||
/// (
|
||||
list_l,
|
||||
/// '(
|
||||
list_lz,
|
||||
/// )
|
||||
list_r,
|
||||
/// @keys
|
||||
kw_keys,
|
||||
/// @rest
|
||||
kw_rest,
|
||||
};
|
||||
|
||||
pub fn format(self: *const Token, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
|
||||
try writer.print("{d}:{d} .{s}", .{ self.loc.start, self.loc.end, @tagName(self.tag) });
|
||||
}
|
||||
};
|
||||
|
||||
pub const Tokenizer = struct {
|
||||
buffer: [:0]const u8,
|
||||
index: usize,
|
||||
start: usize,
|
||||
|
||||
const State = enum {
|
||||
start,
|
||||
alphanum_identifier,
|
||||
number_or_float,
|
||||
decimals,
|
||||
signed_exponent,
|
||||
unsigned_exponent,
|
||||
string,
|
||||
op_plus,
|
||||
op_minus,
|
||||
op_star,
|
||||
op_fslash,
|
||||
op_pipe,
|
||||
op_amp,
|
||||
op_excl,
|
||||
op_deref,
|
||||
op_eq,
|
||||
list_l,
|
||||
list_lz,
|
||||
list_r,
|
||||
};
|
||||
|
||||
pub fn init(buf: [:0]const u8) Tokenizer {
|
||||
return .{
|
||||
.buffer = buf,
|
||||
.index = 0,
|
||||
.start = 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn next(self: *Tokenizer) ?Token {
|
||||
var state: State = .start;
|
||||
while (self.index < self.buffer.len) : (self.index += 1) {
|
||||
const c = self.buffer[self.index];
|
||||
const loc = Token.Loc{ .start = self.start, .end = self.index };
|
||||
state = switch (state) {
|
||||
.start => blk: {
|
||||
self.start = self.index;
|
||||
break :blk switch (c) {
|
||||
'a'...'z', 'A'...'Z', '_' => .alphanum_identifier,
|
||||
'0'...'9' => .number_or_float,
|
||||
'.' => .decimals,
|
||||
'"' => .string,
|
||||
'+' => .op_plus,
|
||||
'-' => .op_minus,
|
||||
'*' => .op_star,
|
||||
'/' => .op_fslash,
|
||||
'|' => .op_pipe,
|
||||
'&' => .op_amp,
|
||||
'!' => .op_excl,
|
||||
'~' => .op_deref,
|
||||
'=' => .op_eq,
|
||||
'(' => .list_l,
|
||||
')' => .list_r,
|
||||
'\'' => .list_lz,
|
||||
else => .start,
|
||||
};
|
||||
},
|
||||
.alphanum_identifier => switch (c) {
|
||||
'a'...'z', 'A'...'Z', '0'...'9', '_' => .alphanum_identifier,
|
||||
else => {
|
||||
inline for (.{ Token.Tag.t, Token.Tag.nil }) |alphanum_tag| {
|
||||
if (std.mem.eql(u8, self.buffer[self.start..self.index], @tagName(alphanum_tag))) {
|
||||
return Token{ .tag = alphanum_tag, .loc = loc };
|
||||
}
|
||||
}
|
||||
return Token{ .tag = .sym, .loc = loc };
|
||||
},
|
||||
},
|
||||
.number_or_float => switch (c) {
|
||||
'0'...'9' => .number_or_float,
|
||||
'.' => .decimals,
|
||||
'e' => .signed_exponent,
|
||||
' ', '\n' => {
|
||||
return Token{ .tag = .num, .loc = loc };
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
.decimals => switch (c) {
|
||||
'0'...'9' => .decimals,
|
||||
' ', '\n' => {
|
||||
return Token{ .tag = .num, .loc = loc };
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
.signed_exponent => switch (c) {
|
||||
'0'...'9', '+', '-' => .unsigned_exponent,
|
||||
else => unreachable,
|
||||
},
|
||||
.unsigned_exponent => switch (c) {
|
||||
'0'...'9' => .unsigned_exponent,
|
||||
' ', '\n' => {
|
||||
return Token{ .tag = .num, .loc = loc };
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
.string => switch (c) {
|
||||
'"' => {
|
||||
return Token{ .tag = .str, .loc = loc };
|
||||
},
|
||||
'\\' => blk: {
|
||||
self.index += 1;
|
||||
break :blk .string;
|
||||
},
|
||||
else => .string,
|
||||
},
|
||||
.op_plus, .op_minus, .op_fslash, .op_star, .op_excl, .op_eq => switch (c) {
|
||||
'=' => {
|
||||
return Token{ .tag = switch (state) {
|
||||
.op_plus => .assign_add,
|
||||
.op_minus => .assign_sub,
|
||||
.op_star => .assign_mul,
|
||||
.op_fslash => .assign_div,
|
||||
.op_excl => .op_neq,
|
||||
.op_eq => .op_eq,
|
||||
else => unreachable,
|
||||
}, .loc = loc };
|
||||
},
|
||||
' ', '\n' => {
|
||||
return Token{ .tag = switch (state) {
|
||||
.op_plus => .op_add,
|
||||
.op_minus => .op_sub,
|
||||
.op_star => .op_mul,
|
||||
.op_fslash => .op_div,
|
||||
.op_excl => .op_not,
|
||||
.op_eq => .assign,
|
||||
else => unreachable,
|
||||
}, .loc = loc };
|
||||
},
|
||||
'>' => {
|
||||
return Token{ .tag = switch (state) {
|
||||
.op_minus => .op_acc,
|
||||
else => unreachable,
|
||||
}, .loc = loc };
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
.op_pipe => switch (c) {
|
||||
'|' => {
|
||||
return Token{ .tag = .op_or, .loc = loc };
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
.op_amp => switch (c) {
|
||||
'&' => {
|
||||
return Token{ .tag = .op_and, .loc = loc };
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
.op_deref => switch (c) {
|
||||
'>' => {
|
||||
return Token{ .tag = .op_derefacc, .loc = loc };
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
.list_l => {
|
||||
return Token{ .tag = .list_l, .loc = loc };
|
||||
},
|
||||
.list_r => {
|
||||
return Token{ .tag = .list_r, .loc = loc };
|
||||
},
|
||||
.list_lz => switch (c) {
|
||||
'(' => {
|
||||
return Token{ .tag = .op_derefacc, .loc = loc };
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
test "simple tokenization" {
|
||||
const example =
|
||||
\\t
|
||||
\\nil
|
||||
\\a = b
|
||||
\\"some string w/ escaped\""
|
||||
;
|
||||
|
||||
var tokz = Tokenizer.init(example);
|
||||
try std.testing.expectEqual(Token{ .loc = .{ .start = 0, .end = 1 }, .tag = .t }, tokz.next());
|
||||
try std.testing.expectEqual(Token{ .loc = .{ .start = 2, .end = 5 }, .tag = .nil }, tokz.next());
|
||||
try std.testing.expectEqual(Token{ .loc = .{ .start = 6, .end = 7 }, .tag = .sym }, tokz.next());
|
||||
try std.testing.expectEqual(Token{ .loc = .{ .start = 8, .end = 9 }, .tag = .assign }, tokz.next());
|
||||
try std.testing.expectEqual(Token{ .loc = .{ .start = 10, .end = 11 }, .tag = .sym }, tokz.next());
|
||||
try std.testing.expectEqual(Token{ .loc = .{ .start = 12, .end = 37 }, .tag = .str }, tokz.next());
|
||||
}
|
Loading…
Reference in New Issue