diff --git a/.vscode/launch.json b/.vscode/launch.json index 3d70c12..ea00c1e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -7,6 +7,14 @@ "request": "launch", "program": "/home/patrick/git/skill-ls/.venv/bin/skillls", "python": "/home/patrick/git/skill-ls/.venv/bin/python" + }, + + { + "name": "main", + "type": "python", + "request": "launch", + "module": "skillls.parsing.iterative", + "python": "/home/patrick/git/skill-ls/.venv/bin/python" } ] } diff --git a/examples/example.il b/examples/example.il index 86973e4..6fb57eb 100644 --- a/examples/example.il +++ b/examples/example.il @@ -3,12 +3,13 @@ example = nil example2 = example ( - example[qdqoifq] (let (some vars (default 0)) ; ... some wall of text "))" wqdqwf = '(doqwf) + var = 1.3 + var = 231 qqvwv ) ) diff --git a/skillls/main.py b/skillls/main.py index 9fe4fa9..c98c77e 100644 --- a/skillls/main.py +++ b/skillls/main.py @@ -1,35 +1,21 @@ from logging import INFO, basicConfig, getLogger -from pathlib import Path from time import time from lsprotocol.types import ( - TEXT_DOCUMENT_DIAGNOSTIC, - TEXT_DOCUMENT_DID_CHANGE, TEXT_DOCUMENT_DID_OPEN, TEXT_DOCUMENT_DID_SAVE, TEXT_DOCUMENT_DOCUMENT_SYMBOL, - TEXT_DOCUMENT_PUBLISH_DIAGNOSTICS, CompletionItem, - Diagnostic, - DiagnosticSeverity, - DidChangeTextDocumentParams, DidOpenTextDocumentParams, DidSaveTextDocumentParams, - DocumentDiagnosticParams, - DocumentDiagnosticReport, DocumentSymbol, DocumentSymbolParams, - Position, - Range, - RelatedFullDocumentDiagnosticReport, ) from pygls.server import LanguageServer -from parsimonious import Grammar, IncompleteParseError -from skillls.parsing.iterative import IterativeParser +from skillls.parsing.iterative import IterativeParser, TokenParser from .cache import Cache -from .parsing.tokenize import Locator, SkillVisitor URI = str @@ -41,52 +27,14 @@ logger = getLogger(__name__) server = LanguageServer("skillls", "v0.1") -def parse(content: str): - path = Path(__file__).parent / "grammar.peg" - grammar = Grammar(path.read_text()) +@server.feature(TEXT_DOCUMENT_DOCUMENT_SYMBOL) +def on_hover(params: DocumentSymbolParams) -> list[DocumentSymbol]: + server.workspace.remove_text_document(params.text_document.uri) + doc = server.workspace.get_text_document(params.text_document.uri) + t = TokenParser() + t.prepare_content(doc.source) - locator = Locator(content.split()) - tree = grammar.parse(content) - - iv = SkillVisitor(locator) - output = iv.visit(tree) - - return output - - -# @server.feature(TEXT_DOCUMENT_DOCUMENT_SYMBOL) -# def document_symbols(params: DocumentSymbolParams) -> list[DocumentSymbol]: -# logger.warning("requested document symbols for %s", params.text_document.uri) -# doc = server.workspace.get_text_document(params.text_document.uri) -# try: -# logger.warning(parse(doc.source)) -# except IncompleteParseError as e: -# server.publish_diagnostics( -# params.text_document.uri, -# [ -# Diagnostic( -# Range( -# Position(e.line() - 1, e.column() - 1), -# Position(len(doc.lines), 0), -# ), -# str(e), -# severity=DiagnosticSeverity.Error, -# ) -# ], -# ) -# return [] - - -# @server.feature(TEXT_DOCUMENT_DIAGNOSTIC) -# def document_diagnostic(params: DocumentDiagnosticParams) -> DocumentDiagnosticReport: -# doc = server.workspace.get_text_document(params.text_document.uri) -# p = IterativeParser() -# # parsed = (e for e in parse(doc.source) if isinstance(e, DocumentSymbol)) -# # diags = [ -# # Diagnostic(e.range, e.name, severity=DiagnosticSeverity.Error) for e in parsed -# # ] -# diags = p(doc.lines) -# return RelatedFullDocumentDiagnosticReport(diags) + return t._token_tree @server.feature(TEXT_DOCUMENT_DID_OPEN) @@ -109,8 +57,3 @@ def on_save(params: DidSaveTextDocumentParams) -> None: def main(): server.start_io() - - -if __name__ == "__main__": - file = Path(__file__).parent.parent / "examples" / "example.il" - out = parse(file.read_text()) diff --git a/skillls/parsing/iterative.py b/skillls/parsing/iterative.py index 0d3cdc1..f2e3635 100644 --- a/skillls/parsing/iterative.py +++ b/skillls/parsing/iterative.py @@ -1,9 +1,19 @@ +from abc import ABC from dataclasses import dataclass, field -from enum import Enum, auto +from enum import Enum from logging import getLogger +import re +from pathlib import Path from typing import NamedTuple, Self -from lsprotocol.types import Diagnostic, DiagnosticSeverity, Position, Range +from lsprotocol.types import ( + Diagnostic, + DiagnosticSeverity, + DocumentSymbol, + Position, + Range, + SymbolKind, +) logger = getLogger(__name__) @@ -51,6 +61,156 @@ class StackElement(NamedTuple): elem: SyntaxPair +WHITESPACE_OR_PAREN = re.compile(r"(\s|\(|\)|\[|\]|\'\()+") +TOKEN_REGEX = re.compile(r"\w[a-zA-Z0-9_]*") +NUMBER_REGEX = re.compile(r"\d+(\.\d+)?") +OPERATORS = re.compile(r"(->|~>|\+|\-|\*|\/|\=)") + + +@dataclass +class TreeToken(ABC): + content: str + range: Range + + +def String(content: str, range: Range) -> DocumentSymbol: + return DocumentSymbol( + name=content, + range=range, + kind=SymbolKind.String, + selection_range=range, + ) + + +def Operator(content: str, range: Range) -> DocumentSymbol: + return DocumentSymbol( + name=content, + range=range, + kind=SymbolKind.Operator, + selection_range=range, + ) + + +def Number(content: str, range: Range) -> DocumentSymbol: + return DocumentSymbol( + name=content, + range=range, + kind=SymbolKind.Number, + selection_range=range, + ) + + +def Token(content: str, range: Range) -> DocumentSymbol: + return DocumentSymbol( + name=content, + range=range, + kind=SymbolKind.Variable, + selection_range=range, + ) + + +RawIndex = int +ColIndex = int +LineIndex = int + + +@dataclass +class TokenParser: + _in_string: bool = False + _in_comment: bool = False + _token_tree: list[DocumentSymbol] = field(default_factory=list) + _current: str = "" + _line_indices: list[RawIndex] = field(default_factory=list) + + def _get_line(self, index: RawIndex) -> tuple[LineIndex, RawIndex]: + for line, newline_pos in enumerate(self._line_indices): + if index < newline_pos: + return line, self._line_indices[line - 1] if line > 0 else 0 + + return len(self._line_indices), self._line_indices[-1] + + def _get_range(self, start: RawIndex, end: RawIndex) -> Range: + start_line, start_line_index = self._get_line(start) + start_col = start - start_line_index - 1 + + end_line, end_line_index = self._get_line(end) + end_col = end - end_line_index - 1 + + return Range(Position(start_line, start_col), Position(end_line, end_col)) + + def _parse_string(self, raw: str, index: int) -> int: + stop = raw.index('"', index + 1) + self._token_tree.append( + String(raw[index : stop + 1], self._get_range(index, stop)) + ) + return stop + 1 + + def _parse_comment(self, raw: str, index: int) -> int: + stop = raw.index("\n", index) + # self._token_tree.append(Comment(raw[index:stop], self._get_range(index, stop))) + return stop + 1 + + def _parse_whitespace(self, raw: str, index: int) -> int: + if m := WHITESPACE_OR_PAREN.search(raw, index): + stop = m.end() + else: + stop = index + 1 + + # self._token_tree.append(Whitespace(raw[index:stop])) + return stop + + def _parse_operator(self, raw: str, index: int) -> int: + if m := OPERATORS.search(raw, index): + stop = m.end() + else: + stop = index + 1 + + self._token_tree.append( + Operator(raw[index:stop], self._get_range(index, stop - 1)) + ) + return stop + 1 + + def _parse_token(self, raw: str, index: int) -> int: + if m := TOKEN_REGEX.search(raw, index): + stop = m.end() + else: + stop = index + 1 + + self._token_tree.append( + Token(raw[index:stop], self._get_range(index, stop - 1)) + ) + return stop + + def _parse_number(self, raw: str, index: int) -> int: + if m := NUMBER_REGEX.search(raw, index): + stop = m.end() + else: + stop = index + 1 + + self._token_tree.append( + Number(raw[index:stop], self._get_range(index, stop - 1)) + ) + return stop + + def prepare_content(self, raw: str) -> None: + self._line_indices = [i for i, char in enumerate(raw) if char == "\n"] + max_index = len(raw) + index = 0 + while index < max_index: + if raw[index] == '"': + index = self._parse_string(raw, index) + elif raw[index] == ";": + index = self._parse_comment(raw, index) + elif WHITESPACE_OR_PAREN.match(raw[index : index + 2]): + index = self._parse_whitespace(raw, index) + elif OPERATORS.match(raw[index]): + index = self._parse_operator(raw, index) + elif NUMBER_REGEX.match(raw[index]): + index = self._parse_number(raw, index) + else: + index = self._parse_token(raw, index) + + @dataclass() class IterativeParser: _stack: list[StackElement] = field(default_factory=list) @@ -129,5 +289,8 @@ class IterativeParser: if __name__ == "__main__": - p = IterativeParser() - print(p(["((([]]))"])) + example = Path(__file__).parent.parent.parent / "examples" / "example.il" + + t = TokenParser() + t.prepare_content(example.read_text()) + print(t._token_tree) diff --git a/skillls/parsing/tokenize.py b/skillls/parsing/tokenize.py index 8b1189c..8b13789 100644 --- a/skillls/parsing/tokenize.py +++ b/skillls/parsing/tokenize.py @@ -1,116 +1 @@ -from collections.abc import Iterable, Iterator -from typing import Any, Sequence -from lsprotocol.types import DocumentSymbol, Range, SymbolKind -from parsimonious import ParseError -from dataclasses import dataclass -from parsimonious.nodes import Node, NodeVisitor - -from .location import Locator - - -@dataclass(frozen=True) -class BaseToken: - range: Range - - -@dataclass(frozen=True) -class Literal(BaseToken): - value: str | float | bool - - -@dataclass(frozen=True) -class Token(BaseToken): - value: str - - -@dataclass(frozen=True) -class List(BaseToken): - value: list[BaseToken] - - -def flatten(xs: Iterable) -> Iterator[Any]: - for x in xs: - if isinstance(x, Iterable) and not isinstance(x, (str, bytes)): - yield from flatten(x) - else: - yield x - - -@dataclass -class SkillVisitor(NodeVisitor): - locator: Locator - - def visit_skill(self, _: Node, visited_children: Sequence[Any]) -> list[BaseToken]: - return list(flatten(visited_children)) - - def visit_TOKEN(self, node: Node, _: Any) -> DocumentSymbol: - r = self.locator.locate(node) - print(r) - return DocumentSymbol(node.text, SymbolKind.Property, r, r) - - def visit_LITERAL(self, node: Node, visited_children: list[None | Node]) -> Literal: - value, *_ = visited_children - if value: - match value.expr_name: - case "L_t": - return Literal(self.locator.locate(node), True) - case "L_nil": - return Literal(self.locator.locate(node), False) - case "L_num": - return Literal(self.locator.locate(node), float(value.text)) - case "L_string": - return Literal(self.locator.locate(node), value.text) - case _: - pass - - raise ParseError("something went wrong during literal parsing") - - def visit_listraw( - self, node: Node, visited_children: list[list[list[Any]]] - ) -> List: - rest = visited_children[2] - - children = [] - - for child in rest: - for part in child: - if isinstance(part, BaseToken): - children.append(part) - - return List(self.locator.locate(node), children) - - def visit_listc(self, node: Node, visited_children: list[list[list[Any]]]) -> List: - rest = ([[visited_children[0]]], visited_children[2]) - - children = [] - - for child_list in rest: - for child in child_list: - for part in child: - if isinstance(part, BaseToken): - children.append(part) - - return List(self.locator.locate(node), children) - - def visit_listskill( - self, node: Node, visited_children: list[list[list[Any]]] - ) -> List: - rest = visited_children[1] - - children = [] - - for child in rest: - for part in child: - if isinstance(part, BaseToken): - children.append(part) - - return List(self.locator.locate(node), children) - - def visit_inline_assign(self, node: Node, visited_children: Sequence[Any]): - return visited_children or node - - def generic_visit( - self, node: Node, visited_children: Sequence[Any] - ) -> Node | Sequence[None | Node]: - return visited_children or node