add tokenizer
This commit is contained in:
parent
78b6dfbcee
commit
6110b248d1
30
src/main.zig
30
src/main.zig
|
@ -1,24 +1,18 @@
|
|||
const std = @import("std");
|
||||
const tkz = @import("tokenize.zig");
|
||||
|
||||
pub fn main() !void {
|
||||
// Prints to stderr (it's a shortcut based on `std.io.getStdErr()`)
|
||||
std.debug.print("All your {s} are belong to us.\n", .{"codebase"});
|
||||
var file = try std.fs.cwd().openFile("data/example.il", .{});
|
||||
defer file.close();
|
||||
|
||||
// stdout is for the actual output of your application, for example if you
|
||||
// are implementing gzip, then only the compressed bytes should be sent to
|
||||
// stdout, not any debugging messages.
|
||||
const stdout_file = std.io.getStdOut().writer();
|
||||
var bw = std.io.bufferedWriter(stdout_file);
|
||||
const stdout = bw.writer();
|
||||
const content = try file.readToEndAlloc(std.heap.page_allocator, 4096 * ((1 << 10) << 10));
|
||||
|
||||
try stdout.print("Run `zig build test` to run the tests.\n", .{});
|
||||
|
||||
try bw.flush(); // don't forget to flush!
|
||||
}
|
||||
|
||||
test "simple test" {
|
||||
var list = std.ArrayList(i32).init(std.testing.allocator);
|
||||
defer list.deinit(); // try commenting this out and see if zig detects the memory leak!
|
||||
try list.append(42);
|
||||
try std.testing.expectEqual(@as(i32, 42), list.pop());
|
||||
const toks = try tkz.tokenizeContent(content);
|
||||
for (toks.items) |tok| {
|
||||
std.debug.print("{}:{} `{s}`\n", .{
|
||||
tok.line,
|
||||
tok.char,
|
||||
tok.value,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
const std = @import("std");
|
||||
|
||||
const Token = struct {
|
||||
/// 0-based index of token start in whole file
|
||||
start: usize,
|
||||
/// 1-based line numbert token starts at
|
||||
line: usize,
|
||||
/// 1-based char numbert token starts at in line
|
||||
char: usize,
|
||||
|
||||
value: []const u8,
|
||||
};
|
||||
|
||||
const TokenizationError = error{InvalidKeyword};
|
||||
|
||||
pub fn tokenizeContent(content: []u8) !std.ArrayList(Token) {
|
||||
var toks = std.ArrayList(Token).init(std.heap.page_allocator);
|
||||
var lines = std.ArrayList(usize).init(std.heap.page_allocator);
|
||||
|
||||
var index: usize = 0;
|
||||
while (index < content.len) {
|
||||
var l: usize = 1;
|
||||
const char = content[index];
|
||||
_ = switch (char) {
|
||||
'\n' => {
|
||||
try lines.append(index);
|
||||
index += l;
|
||||
continue;
|
||||
},
|
||||
';' => {
|
||||
while (switch (content[index + l]) {
|
||||
'\n' => false,
|
||||
else => true,
|
||||
}) : (l += 1) {}
|
||||
},
|
||||
'"' => {
|
||||
while (switch (content[index + l]) {
|
||||
'"' => (content[index + l - 1] == '\\'),
|
||||
else => true,
|
||||
}) : (l += 1) {}
|
||||
l += 1;
|
||||
},
|
||||
'a'...'z', 'A'...'Z', '_' => {
|
||||
while (switch (content[index + l]) {
|
||||
'a'...'z', 'A'...'Z', '0'...'9', '_' => true,
|
||||
else => false,
|
||||
}) : (l += 1) {}
|
||||
},
|
||||
'0'...'9' => {
|
||||
while (switch (content[index + l]) {
|
||||
'0'...'9', '.', 'e' => true,
|
||||
else => false,
|
||||
}) : (l += 1) {}
|
||||
},
|
||||
'+', '-', '~', '*', '/', '%', '<', '>', '=', '?', '|', '&', '(', ')', '\'' => {
|
||||
for ([_]*const [2]u8{ "->", "~>", "||", "&&", "/=", "*=", "+=", "-=", "'(" }) |op| {
|
||||
if (std.mem.eql(u8, op, content[index .. index + 2])) {
|
||||
l = 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
},
|
||||
'@' => {
|
||||
while (switch (content[index + l]) {
|
||||
'a'...'z', 'A'...'Z', '_', '0'...'9' => true,
|
||||
else => false,
|
||||
}) : (l += 1) {}
|
||||
|
||||
if (std.mem.eql(u8, "@keys", content[index .. index + l])) {} else if (std.mem.eql(u8, "@rest", content[index .. index + l])) {} else {
|
||||
std.debug.print("line={d}, char={d}\n", .{
|
||||
.line = lines.items.len + 1,
|
||||
.char = switch (lines.items.len) {
|
||||
0 => index,
|
||||
else => index - lines.items[lines.items.len - 1],
|
||||
},
|
||||
});
|
||||
}
|
||||
},
|
||||
else => {
|
||||
index += l;
|
||||
continue;
|
||||
},
|
||||
};
|
||||
try toks.append(.{
|
||||
.start = index,
|
||||
.value = content[index .. index + l],
|
||||
.line = lines.items.len + 1,
|
||||
.char = switch (lines.items.len) {
|
||||
0 => index,
|
||||
else => index - lines.items[lines.items.len - 1],
|
||||
},
|
||||
});
|
||||
index += l;
|
||||
}
|
||||
|
||||
return toks;
|
||||
}
|
Loading…
Reference in New Issue