From f6fca262c6c66c6f40a24b13d491481d9e8b1e14 Mon Sep 17 00:00:00 2001 From: acereca Date: Fri, 10 Mar 2023 20:29:10 +0100 Subject: [PATCH] initial commit --- .gitignore | 1 + Cargo.lock | 42 +++++++++++++++++++++ Cargo.toml | 9 +++++ src/main.rs | 17 +++++++++ src/states.rs | 96 +++++++++++++++++++++++++++++++++++++++++++++++ test/data/test.il | 11 ++++++ 6 files changed, 176 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/main.rs create mode 100644 src/states.rs create mode 100644 test/data/test.il diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..b0812d5 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,42 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +dependencies = [ + "memchr", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "regex" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" + +[[package]] +name = "skill-oxide" +version = "0.1.0" +dependencies = [ + "regex", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..e7b6704 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "skill-oxide" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +regex = "1" diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..3770e92 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,17 @@ +use std::{env, fs, io::Read}; +use regex::Regex; +use states::Tokenizer; + +mod states; +fn main() { + let args: Vec = env::args().collect(); + let re = Regex::new(r"((?P;)|(?P\b\w+\b))").unwrap(); + + let mut file = fs::File::open(args[1].as_str()).expect("msg"); + let mut data = String::new(); + file.read_to_string(&mut data).expect("msg"); + + let mut tokenizer = Tokenizer::new(); + tokenizer.read_in(data); + +} diff --git a/src/states.rs b/src/states.rs new file mode 100644 index 0000000..d1c49df --- /dev/null +++ b/src/states.rs @@ -0,0 +1,96 @@ +#[derive(Debug)] +pub enum CursorState { + Comment, + List, + Token, + Literal, + Operator +} + +pub struct Token { + typ: CursorState, + content: Vec, +} + +pub struct Tokenizer { + stateStack: Vec, + tokenTree: Vec, +} + +impl Tokenizer { + pub fn new() -> Tokenizer { + Tokenizer { + stateStack: Vec::new(), + tokenTree: Vec::new(), + } + } + + fn match_char(&mut self, c: char) { + match self.stateStack.last() { + None => match c { + c if c.is_whitespace() => {} + ';' => { + self.stateStack.push(CursorState::Comment); + } + '(' => { + self.stateStack.push(CursorState::List); + } + c if c.is_alphanumeric() => {} + _ => { + println!("{}", c); + panic!("not a comment, list or symbol "); + } + }, + Some(CursorState::Comment) => match c { + c if c.is_control() => { + self.stateStack.pop(); + } + c if c.is_alphanumeric() => {} + c if c.is_whitespace() => {} + _ => {} + }, + Some(CursorState::List) => match c { + '(' => { + self.stateStack.push(CursorState::List); + } + ')' => { + self.stateStack.pop(); + } + c if c.is_alphabetic() => { + self.stateStack.push(CursorState::Token); + } + c if c.is_numeric() => { + self.stateStack.push(CursorState::Literal); + } + '"' => { + self.stateStack.push(CursorState::Literal); + } + _ => {} + }, + Some(CursorState::Token) => match c { + c if !c.is_alphanumeric() => { + self.stateStack.pop(); + self.match_char(c); + } + _ => {} + }, + Some(CursorState::Literal) => match c { + c if c.is_whitespace() => { + self.stateStack.pop(); + } + '"' => { + self.stateStack.pop(); + } + _ => {} + }, + _ => {} + } + } + + pub fn read_in(&mut self, content: String) { + for c in content.chars() { + self.match_char(c); + println!("{} -> {}: {:?}", c, self.stateStack.len(), self.stateStack.last().clone()); + } + } +} diff --git a/test/data/test.il b/test/data/test.il new file mode 100644 index 0000000..35517e6 --- /dev/null +++ b/test/data/test.il @@ -0,0 +1,11 @@ +;comment with some words + +(function1 arg1 arg2 + (a b c) + cstyle( arg3) + + var1 = "string" + var2 = 1 + var3 = nil + var4 = t +) \ No newline at end of file