This commit is contained in:
AcerecA 2025-01-16 20:27:36 +01:00
parent 56883c0bed
commit 9e5d32a420
10 changed files with 718 additions and 28 deletions

View File

@ -15,6 +15,9 @@
// Once all dependencies are fetched, `zig build` no longer requires // Once all dependencies are fetched, `zig build` no longer requires
// internet connectivity. // internet connectivity.
.dependencies = .{ .dependencies = .{
.lsfw = .{
.path = "lib/lsfw",
}
// See `zig fetch --save <url>` for a command-line interface for adding dependencies. // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
//.example = .{ //.example = .{
// // When updating this field to a new URL, be sure to delete the corresponding // // When updating this field to a new URL, be sure to delete the corresponding

View File

@ -13,6 +13,7 @@ b_var->a
; some struff to do ; some struff to do
) )
"srting" "srting"
"wqdwd\"qwesfwf" "wqdwd\"qwesfwf"

91
src/classifier.zig Normal file
View File

@ -0,0 +1,91 @@
const tkz = @import("tokenize.zig");
const hlp = @import("helpers.zig");
const std = @import("std");
pub const TokenClass = enum {
symbol,
string,
comment,
docstring,
number,
nil,
t,
list_start,
list_lazy_start,
list_end,
operator,
};
pub const ClassifiedToken = struct {
tok: tkz.Token,
cls: TokenClass,
};
const operators = std.ComptimeStringMap(void, .{
.{"->"},
.{"~>"},
.{"/="},
.{"*="},
.{"-="},
.{"+="},
.{"||"},
.{"&&"},
.{"="},
.{"+"},
.{"-"},
.{"*"},
.{"/"},
.{"~"},
.{"%"},
.{"@keys"},
.{"@rest"},
});
const numbers = std.ComptimeStringMap(void, .{
.{"0"},
.{"1"},
.{"2"},
.{"3"},
.{"4"},
.{"5"},
.{"6"},
.{"7"},
.{"8"},
.{"9"},
});
fn classify(tok: tkz.Token) ClassifiedToken {
return ClassifiedToken{
.tok = tok,
.cls = if (operators.has(tok.value))
TokenClass.operator
else if (std.mem.eql(u8, "'(", tok.value))
TokenClass.list_lazy_start
else if (std.mem.eql(u8, "(", tok.value))
TokenClass.list_start
else if (std.mem.eql(u8, ")", tok.value))
TokenClass.list_end
else if (std.mem.eql(u8, "\"", tok.value[0..1]))
TokenClass.string
else if (std.mem.eql(u8, "nil", tok.value))
TokenClass.nil
else if (std.mem.eql(u8, "t", tok.value))
TokenClass.t
else if (numbers.has(tok.value[0..1]))
TokenClass.number
else if (std.mem.eql(u8, ";", tok.value[0..1]))
if (tok.value.len >= 3 and std.mem.eql(u8, ";;;", tok.value[0..3])) TokenClass.docstring else TokenClass.comment
else
TokenClass.symbol,
};
}
pub fn classifyTokens(toks: []const tkz.Token, allocator: std.mem.Allocator) !std.ArrayList(ClassifiedToken) {
var ctoks = std.ArrayList(ClassifiedToken).init(allocator);
for (toks) |tok| {
try ctoks.append(classify(tok));
}
return ctoks;
}

9
src/helpers.zig Normal file
View File

@ -0,0 +1,9 @@
const std = @import("std");
pub fn isPartOf(comptime T: type, haystack: [][]const T, needle: []const T) bool {
for (haystack) |straw| {
if (std.mem.eql(u8, straw, needle[0..straw.len])) {
return true;
}
}
return false;
}

View File

@ -2,17 +2,74 @@ const std = @import("std");
const lsp_types = @import("lsfw/src/types.zig"); const lsp_types = @import("lsfw/src/types.zig");
const lsp = @import("lsfw/src/lsp.zig"); const lsp = @import("lsfw/src/lsp.zig");
const lsp_doc = @import("lsfw/src/document.zig"); const lsp_doc = @import("lsfw/src/document.zig");
const lsp_log = @import("lsfw/src/logger.zig");
const tkz = @import("tokenize.zig");
const cls = @import("classifier.zig");
const State = struct {}; const State = struct { symbols: std.ArrayList(cls.ClassifiedToken) };
const Lsp = lsp.Lsp(State); const Lsp = lsp.Lsp(State);
const Scope = enum { hi };
fn handleHover(allocator: std.mem.Allocator, ctx: *Lsp.Context, pos: lsp_types.Position) ?[]const u8 {
if (null == ctx.state) {
lsp_log.notify(.info, "could not find token under cursor (at {})", .{pos});
return null;
} else if (0 == ctx.state.?.symbols.items.len) {
handleDocOpen(allocator, ctx);
}
lsp_log.notify(.err, "{}", .{ctx.state.?.symbols});
// for (ctx.state.?.symbols.items) |tok| {
// if (tok.tok.line == pos.line and tok.tok.char <= pos.character and (tok.tok.char + tok.tok.value.len) >= pos.character) {
// lsp_log.notify(.info, "{}", .{tok});
// break;
// }
// }
fn handleHover(allocator: std.mem.Allocator, context: *Lsp.Context, position: lsp_types.Position) ?[]const u8 {
_ = allocator;
_ = position;
_ = context;
return null; return null;
} }
fn handleCompletion(allocator: std.mem.Allocator, context: *Lsp.Context, position: lsp_types.Position) ?lsp_types.CompletionList {
_ = context;
_ = position;
var completions = std.ArrayList(lsp_types.CompletionItem).init(allocator);
if (std.mem.Allocator.Error.OutOfMemory == completions.append(.{
.label = "(procedure)",
.insertText = "(procedure ${1:func_name}($2)\n\n)",
.insertTextFormat = .Snippet,
.kind = .Function,
})) {
return null;
}
return .{ .items = completions.items };
}
fn handleDocOpen(allocator: std.mem.Allocator, context: *Lsp.Context) void {
lsp_log.notify(.err, "opened doc {s}", .{context.document.uri});
const content = context.document.text;
const toks = tkz.tokenizeContent(content, allocator) catch unreachable;
// const toks = std.ArrayList(tkz.Token).init(allocator);
lsp_log.notify(.err, "toks {}", .{toks});
// defer toks.deinit();
const ctoks = cls.classifyTokens(toks.items, allocator) catch unreachable;
lsp_log.notify(.err, "ctoks {}", .{ctoks});
// defer ctoks.deinit();
// const ast = try stx.generateSyntaxTree(ctoks);
lsp_log.notify(.info, "opened {s}, found {d} tokens", .{ context.document.uri, ctoks.items.len });
if (context.state != null) {
context.state.?.symbols.deinit();
}
context.state = .{
.symbols = std.ArrayList(cls.ClassifiedToken).init(allocator),
};
}
fn handleDocChanged(allocator: std.mem.Allocator, context: *Lsp.Context, _: []lsp_types.ChangeEvent) void {
handleDocOpen(allocator, context);
}
fn handleDocClose(_: std.mem.Allocator, _: *Lsp.Context) void {}
pub fn start() !u8 { pub fn start() !u8 {
const descr = lsp_types.ServerData{ const descr = lsp_types.ServerData{
.serverInfo = .{ .serverInfo = .{
@ -21,8 +78,13 @@ pub fn start() !u8 {
}, },
}; };
var server = Lsp.init(std.heap.page_allocator, descr); var gpa = std.heap.GeneralPurposeAllocator(.{}){};
var server = Lsp.init(gpa.allocator(), descr);
server.registerHoverCallback(handleHover); server.registerHoverCallback(handleHover);
server.registerCompletionCallback(handleCompletion);
server.registerDocOpenCallback(handleDocOpen);
server.registerDocChangeCallback(handleDocChanged);
server.registerDocCloseCallback(handleDocClose);
return server.start(); return server.start();
} }

View File

@ -1,18 +1,35 @@
const std = @import("std"); const std = @import("std");
const tkz = @import("tokenize.zig"); const tkz = @import("tokenizer.zig");
// const cls = @import("classifier.zig");
// const stx = @import("syntax.zig");
const lsp = @import("lsp.zig");
pub fn main() !void { pub fn main() !void {
var file = try std.fs.cwd().openFile("data/example.il", .{}); // var file = try std.fs.cwd().openFile("data/example.il", .{});
defer file.close(); // defer file.close();
//
const content = try file.readToEndAlloc(std.heap.page_allocator, 4096 * ((1 << 10) << 10)); // const content = try file.readToEndAlloc(std.heap.page_allocator, 4096 * ((1 << 10) << 10));
//
const toks = try tkz.tokenizeContent(content); // const toks = try tkz.tokenizeContent(content);
for (toks.items) |tok| { // // for (toks.items) |tok| {
std.debug.print("{}:{} `{s}`\n", .{ // // std.debug.print("{}:{} `{s}`\n", .{
tok.line, // // tok.line,
tok.char, // // tok.char,
tok.value, // // tok.value,
}); // // });
} // // }
//
// const ctoks = try cls.classifyTokens(toks);
// // for (ctoks.items) |ctok| {
// // std.debug.print("{}:{}\t`{s:<40}`({})\n", .{
// // ctok.tok.line,
// // ctok.tok.char,
// // ctok.tok.value,
// // ctok.cls,
// // });
// // }
// const ast = try stx.generateSyntaxTree(ctoks);
// std.debug.print("{}\n", .{ast});
//
_ = try lsp.start();
} }

178
src/parser.zig Normal file
View File

@ -0,0 +1,178 @@
const std = @import("std");
const toks = @import("tokenizer.zig");
pub const ParseError = error{ no_fn_name, no_fn_params };
pub const Tag = enum {
///expression
///`<rhs...>`
///
///lhs ignored
expr,
///variable assignment
///`<lhs> = <rhs...>`
///
///lhs is overwritten to be variable
var_assign,
///lazy evaluated list
///`'(<rhs...>)`
///
///lhs ignored
llist,
///list (evaluated)
///`(<lhs> <rhs...>)`
///
///lhs needs to be a callable
list_eval,
///fn_def (procedure)
///`;;; <lhs>
///(procedure <main_token>(<lhs>) <rhs...>)`
fn_def,
};
pub const Node = struct {
tag: Tag,
main_token: Index,
data: Data,
pub const Data = struct {
lhs: Index,
rhs: Index,
};
pub const Index = u32;
};
pub const AstError = error{};
pub const Parser = struct {
gpa: std.mem.Allocator,
source: [:0]const u8,
token_tags: []const toks.Token.Tag,
token_locs: []const toks.Token.Loc,
tok_i: Node.Index,
errs: std.ArrayList(AstError),
nodes: std.MultiArrayList(Node),
extra_data: std.ArrayList(Node.Index),
scratch: std.ArrayList(Node.Index),
pub fn init(buffer: [:0]const u8, mal: std.MultiArrayList(toks.Token), allocator: std.mem.Allocator) !Parser {
return .{
.gpa = allocator,
.source = buffer,
.token_tags = mal.items(.tag),
.token_locs = mal.items(.loc),
.tok_i = 0,
.errs = std.ArrayList(AstError).init(allocator),
.nodes = std.MultiArrayList(Node){},
.extra_data = std.ArrayList(Node.Index).init(allocator),
.scratch = std.ArrayList(Node.Index).init(allocator),
};
}
fn hasToken(self: *Parser, expected: toks.Token.Tag, offset: isize) ?toks.Token {
if (self.token_tags[@intCast(self.tok_i + offset)] == expected) {
return .{ .loc = self.token_locs[@intCast(self.tok_i + offset)], .tag = self.token_tags[@intCast(self.tok_i + offset)] };
}
return null;
}
fn eatToken(self: *Parser, expected: toks.Token.Tag) ?Node.Index {
const tok = self.hasToken(expected, 0);
if (tok != null) {
self.tok_i += 1;
return self.tok_i - 1;
}
return null;
}
fn parse_fn_proc(self: *Parser) ?Node {
_ = self.eatToken(.sym);
if (self.hasToken(.list_l, -2) != null) {
// lisp style
} else if (self.eatToken(.list_l) != null) {
// c style
} else {
// not a procedure call or invalid syntax?
}
const name = self.eatToken(.sym) orelse return null;
std.debug.print("found procedure def for `{s}`", .{self.source[self.token_locs[name].start..self.token_locs[name].end]});
_ = self.eatToken(.list_l) orelse return null;
var open_lists: usize = 0;
while (true) : (self.tok_i += 1) {
switch (self.token_tags[self.tok_i]) {
.list_l, .list_lz => {
open_lists += 1;
},
.list_r => {
if (open_lists > 0) {
open_lists -= 1;
} else {
break;
}
},
else => {},
}
}
while (true) : (self.tok_i += 1) {
switch (self.token_tags[self.tok_i]) {
.list_l, .list_lz => {
open_lists += 1;
},
.list_r => {
if (open_lists > 0) {
open_lists -= 1;
} else {
break;
}
},
else => {},
}
}
self.tok_i += 1;
return Node{ .tag = .fn_def, .main_token = name, .data = .{ .lhs = 0, .rhs = 0 } };
}
pub fn next(self: *Parser) ?Node {
while (self.tok_i < self.token_tags.len) : (self.tok_i += 1) {
switch (self.token_tags[self.tok_i]) {
toks.Token.Tag.sym => {
if (std.mem.eql(u8, "procedure", self.source[self.token_locs[self.tok_i].start..self.token_locs[self.tok_i].end])) {
return self.parse_fn_proc();
}
},
else => {},
}
}
return null;
}
};
test "parsing of simple example" {
const example =
\\t
\\nil
\\a = b
\\"some string w/ escaped\""
\\(procedure a() )
;
var tokz = toks.Tokenizer.init(example);
var tokens = std.MultiArrayList(toks.Token){};
defer tokens.deinit(std.testing.allocator);
while (tokz.next()) |tok| {
try tokens.append(std.testing.allocator, tok);
std.debug.print("{}\n", .{tok});
}
var parse = try Parser.init(example, tokens, std.testing.allocator);
while (parse.next()) |ast_node| {
std.debug.print("{}\n", .{ast_node});
}
}

55
src/syntax.zig Normal file
View File

@ -0,0 +1,55 @@
const std = @import("std");
const cls = @import("classifier.zig");
pub const SyntaxNode = struct {
ctok: cls.ClassifiedToken,
nodes: ?std.ArrayList(SyntaxNode),
};
pub fn generateSyntaxTree(ctoks: std.ArrayList(cls.ClassifiedToken)) !std.ArrayList(SyntaxNode) {
var nodes = std.ArrayList(SyntaxNode).init(std.heap.page_allocator);
var actives = std.ArrayList(SyntaxNode).init(std.heap.page_allocator);
for (ctoks.items) |ctok| {
switch (ctok.cls) {
cls.TokenClass.comment, cls.TokenClass.docstring => {
try nodes.append(.{
.ctok = ctok,
.nodes = null,
});
},
cls.TokenClass.list_start, cls.TokenClass.list_lazy_start => {
try actives.append(.{
.ctok = ctok,
.nodes = std.ArrayList(SyntaxNode).init(std.heap.page_allocator),
});
},
cls.TokenClass.list_end => {
if (actives.items.len > 0) {
try nodes.append(actives.pop());
} else {
std.debug.print("{}\n", .{actives});
}
},
else => {
const active_top = actives.popOrNull();
if (active_top != null) {
var active = active_top.?;
var actives_nodes: std.ArrayList(SyntaxNode) = undefined;
if (active.nodes != null) {
actives_nodes = active.nodes.?;
} else {
active.nodes = std.ArrayList(SyntaxNode).init(std.heap.page_allocator);
actives_nodes = active.nodes.?;
}
try actives_nodes.append(.{
.ctok = ctok,
.nodes = null,
});
} else {}
},
}
}
return nodes;
}

View File

@ -1,6 +1,7 @@
const std = @import("std"); const std = @import("std");
const lsp = @import("lsfw/src/lsp.zig");
const Token = struct { pub const Token = struct {
/// 0-based index of token start in whole file /// 0-based index of token start in whole file
start: usize, start: usize,
/// 1-based line numbert token starts at /// 1-based line numbert token starts at
@ -13,9 +14,10 @@ const Token = struct {
const TokenizationError = error{InvalidKeyword}; const TokenizationError = error{InvalidKeyword};
pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) { pub fn tokenizeContent(content: []u8, allocator: std.mem.Allocator) !std.ArrayList(Token) {
var toks = std.ArrayList(Token).init(std.heap.page_allocator); var toks = std.ArrayList(Token).init(allocator);
var lines = std.ArrayList(usize).init(std.heap.page_allocator); var lines = std.ArrayList(usize).init(allocator);
defer lines.deinit();
var index: usize = 0; var index: usize = 0;
while (index < content.len) { while (index < content.len) {
@ -68,7 +70,7 @@ pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
if (std.mem.eql(u8, "@keys", content[index .. index + l])) {} else if (std.mem.eql(u8, "@rest", content[index .. index + l])) {} else { if (std.mem.eql(u8, "@keys", content[index .. index + l])) {} else if (std.mem.eql(u8, "@rest", content[index .. index + l])) {} else {
std.debug.print("line={d}, char={d}\n", .{ std.debug.print("line={d}, char={d}\n", .{
.line = lines.items.len + 1, .line = lines.items.len,
.char = switch (lines.items.len) { .char = switch (lines.items.len) {
0 => index, 0 => index,
else => index - lines.items[lines.items.len - 1], else => index - lines.items[lines.items.len - 1],
@ -83,8 +85,8 @@ pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
}; };
try toks.append(.{ try toks.append(.{
.start = index, .start = index,
.value = content[index .. index + l], .value = try allocator.dupe(u8, content[index .. index + l]),
.line = lines.items.len + 1, .line = lines.items.len,
.char = switch (lines.items.len) { .char = switch (lines.items.len) {
0 => index, 0 => index,
else => index - lines.items[lines.items.len - 1], else => index - lines.items[lines.items.len - 1],
@ -92,6 +94,6 @@ pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
}); });
index += l; index += l;
} }
lsp.logger.notify(.err, "done with initial tokenization, generated {d} tokens", .{toks.items.len});
return toks; return toks;
} }

272
src/tokenizer.zig Normal file
View File

@ -0,0 +1,272 @@
const std = @import("std");
pub const Token = struct {
tag: Tag,
loc: Loc,
pub const Loc = struct {
start: usize,
end: usize,
};
pub const Tag = enum {
sym,
num,
str,
/// t
t,
/// nil
nil,
/// =
assign,
/// -=
assign_sub,
/// /=
assign_div,
/// *=
assign_mul,
/// +=
assign_add,
/// ==
op_eq,
/// >
op_gt,
/// >=
op_geq,
/// <
op_lt,
/// <=
op_leq,
/// /
op_div,
/// *
op_mul,
/// +
op_add,
/// -
op_sub,
/// ->
op_acc,
/// ~>
op_derefacc,
/// %
op_mod,
/// !
op_not,
/// !=
op_neq,
/// ||
op_or,
/// &&
op_and,
/// (
list_l,
/// '(
list_lz,
/// )
list_r,
/// @keys
kw_keys,
/// @rest
kw_rest,
};
pub fn format(self: *const Token, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
try writer.print("{d}:{d} .{s}", .{ self.loc.start, self.loc.end, @tagName(self.tag) });
}
};
pub const Tokenizer = struct {
buffer: [:0]const u8,
index: usize,
start: usize,
const State = enum {
start,
alphanum_identifier,
number_or_float,
decimals,
signed_exponent,
unsigned_exponent,
string,
op_plus,
op_minus,
op_star,
op_fslash,
op_pipe,
op_amp,
op_excl,
op_deref,
op_eq,
list_l,
list_lz,
list_r,
};
pub fn init(buf: [:0]const u8) Tokenizer {
return .{
.buffer = buf,
.index = 0,
.start = 0,
};
}
pub fn next(self: *Tokenizer) ?Token {
var state: State = .start;
while (self.index < self.buffer.len) : (self.index += 1) {
const c = self.buffer[self.index];
const loc = Token.Loc{ .start = self.start, .end = self.index };
state = switch (state) {
.start => blk: {
self.start = self.index;
break :blk switch (c) {
'a'...'z', 'A'...'Z', '_' => .alphanum_identifier,
'0'...'9' => .number_or_float,
'.' => .decimals,
'"' => .string,
'+' => .op_plus,
'-' => .op_minus,
'*' => .op_star,
'/' => .op_fslash,
'|' => .op_pipe,
'&' => .op_amp,
'!' => .op_excl,
'~' => .op_deref,
'=' => .op_eq,
'(' => .list_l,
')' => .list_r,
'\'' => .list_lz,
else => .start,
};
},
.alphanum_identifier => switch (c) {
'a'...'z', 'A'...'Z', '0'...'9', '_' => .alphanum_identifier,
else => {
inline for (.{ Token.Tag.t, Token.Tag.nil }) |alphanum_tag| {
if (std.mem.eql(u8, self.buffer[self.start..self.index], @tagName(alphanum_tag))) {
return Token{ .tag = alphanum_tag, .loc = loc };
}
}
return Token{ .tag = .sym, .loc = loc };
},
},
.number_or_float => switch (c) {
'0'...'9' => .number_or_float,
'.' => .decimals,
'e' => .signed_exponent,
' ', '\n' => {
return Token{ .tag = .num, .loc = loc };
},
else => unreachable,
},
.decimals => switch (c) {
'0'...'9' => .decimals,
' ', '\n' => {
return Token{ .tag = .num, .loc = loc };
},
else => unreachable,
},
.signed_exponent => switch (c) {
'0'...'9', '+', '-' => .unsigned_exponent,
else => unreachable,
},
.unsigned_exponent => switch (c) {
'0'...'9' => .unsigned_exponent,
' ', '\n' => {
return Token{ .tag = .num, .loc = loc };
},
else => unreachable,
},
.string => switch (c) {
'"' => {
return Token{ .tag = .str, .loc = loc };
},
'\\' => blk: {
self.index += 1;
break :blk .string;
},
else => .string,
},
.op_plus, .op_minus, .op_fslash, .op_star, .op_excl, .op_eq => switch (c) {
'=' => {
return Token{ .tag = switch (state) {
.op_plus => .assign_add,
.op_minus => .assign_sub,
.op_star => .assign_mul,
.op_fslash => .assign_div,
.op_excl => .op_neq,
.op_eq => .op_eq,
else => unreachable,
}, .loc = loc };
},
' ', '\n' => {
return Token{ .tag = switch (state) {
.op_plus => .op_add,
.op_minus => .op_sub,
.op_star => .op_mul,
.op_fslash => .op_div,
.op_excl => .op_not,
.op_eq => .assign,
else => unreachable,
}, .loc = loc };
},
'>' => {
return Token{ .tag = switch (state) {
.op_minus => .op_acc,
else => unreachable,
}, .loc = loc };
},
else => unreachable,
},
.op_pipe => switch (c) {
'|' => {
return Token{ .tag = .op_or, .loc = loc };
},
else => unreachable,
},
.op_amp => switch (c) {
'&' => {
return Token{ .tag = .op_and, .loc = loc };
},
else => unreachable,
},
.op_deref => switch (c) {
'>' => {
return Token{ .tag = .op_derefacc, .loc = loc };
},
else => unreachable,
},
.list_l => {
return Token{ .tag = .list_l, .loc = loc };
},
.list_r => {
return Token{ .tag = .list_r, .loc = loc };
},
.list_lz => switch (c) {
'(' => {
return Token{ .tag = .op_derefacc, .loc = loc };
},
else => unreachable,
},
};
}
return null;
}
};
test "simple tokenization" {
const example =
\\t
\\nil
\\a = b
\\"some string w/ escaped\""
;
var tokz = Tokenizer.init(example);
try std.testing.expectEqual(Token{ .loc = .{ .start = 0, .end = 1 }, .tag = .t }, tokz.next());
try std.testing.expectEqual(Token{ .loc = .{ .start = 2, .end = 5 }, .tag = .nil }, tokz.next());
try std.testing.expectEqual(Token{ .loc = .{ .start = 6, .end = 7 }, .tag = .sym }, tokz.next());
try std.testing.expectEqual(Token{ .loc = .{ .start = 8, .end = 9 }, .tag = .assign }, tokz.next());
try std.testing.expectEqual(Token{ .loc = .{ .start = 10, .end = 11 }, .tag = .sym }, tokz.next());
try std.testing.expectEqual(Token{ .loc = .{ .start = 12, .end = 37 }, .tag = .str }, tokz.next());
}