complete simplest token parser and add document symbol lookup

This commit is contained in:
acereca 2023-10-21 22:05:37 +02:00
parent 28e137988f
commit 1d5d744f64
5 changed files with 185 additions and 185 deletions

8
.vscode/launch.json vendored
View File

@ -7,6 +7,14 @@
"request": "launch", "request": "launch",
"program": "/home/patrick/git/skill-ls/.venv/bin/skillls", "program": "/home/patrick/git/skill-ls/.venv/bin/skillls",
"python": "/home/patrick/git/skill-ls/.venv/bin/python" "python": "/home/patrick/git/skill-ls/.venv/bin/python"
},
{
"name": "main",
"type": "python",
"request": "launch",
"module": "skillls.parsing.iterative",
"python": "/home/patrick/git/skill-ls/.venv/bin/python"
} }
] ]
} }

View File

@ -3,12 +3,13 @@ example = nil
example2 = example example2 = example
( (
example[qdqoifq]
(let (some vars (default 0)) (let (some vars (default 0))
; ... some wall of text ; ... some wall of text
"))" "))"
wqdqwf = '(doqwf) wqdqwf = '(doqwf)
var = 1.3
var = 231
qqvwv qqvwv
) )
) )

View File

@ -1,35 +1,21 @@
from logging import INFO, basicConfig, getLogger from logging import INFO, basicConfig, getLogger
from pathlib import Path
from time import time from time import time
from lsprotocol.types import ( from lsprotocol.types import (
TEXT_DOCUMENT_DIAGNOSTIC,
TEXT_DOCUMENT_DID_CHANGE,
TEXT_DOCUMENT_DID_OPEN, TEXT_DOCUMENT_DID_OPEN,
TEXT_DOCUMENT_DID_SAVE, TEXT_DOCUMENT_DID_SAVE,
TEXT_DOCUMENT_DOCUMENT_SYMBOL, TEXT_DOCUMENT_DOCUMENT_SYMBOL,
TEXT_DOCUMENT_PUBLISH_DIAGNOSTICS,
CompletionItem, CompletionItem,
Diagnostic,
DiagnosticSeverity,
DidChangeTextDocumentParams,
DidOpenTextDocumentParams, DidOpenTextDocumentParams,
DidSaveTextDocumentParams, DidSaveTextDocumentParams,
DocumentDiagnosticParams,
DocumentDiagnosticReport,
DocumentSymbol, DocumentSymbol,
DocumentSymbolParams, DocumentSymbolParams,
Position,
Range,
RelatedFullDocumentDiagnosticReport,
) )
from pygls.server import LanguageServer from pygls.server import LanguageServer
from parsimonious import Grammar, IncompleteParseError
from skillls.parsing.iterative import IterativeParser from skillls.parsing.iterative import IterativeParser, TokenParser
from .cache import Cache from .cache import Cache
from .parsing.tokenize import Locator, SkillVisitor
URI = str URI = str
@ -41,52 +27,14 @@ logger = getLogger(__name__)
server = LanguageServer("skillls", "v0.1") server = LanguageServer("skillls", "v0.1")
def parse(content: str): @server.feature(TEXT_DOCUMENT_DOCUMENT_SYMBOL)
path = Path(__file__).parent / "grammar.peg" def on_hover(params: DocumentSymbolParams) -> list[DocumentSymbol]:
grammar = Grammar(path.read_text()) server.workspace.remove_text_document(params.text_document.uri)
doc = server.workspace.get_text_document(params.text_document.uri)
t = TokenParser()
t.prepare_content(doc.source)
locator = Locator(content.split()) return t._token_tree
tree = grammar.parse(content)
iv = SkillVisitor(locator)
output = iv.visit(tree)
return output
# @server.feature(TEXT_DOCUMENT_DOCUMENT_SYMBOL)
# def document_symbols(params: DocumentSymbolParams) -> list[DocumentSymbol]:
# logger.warning("requested document symbols for %s", params.text_document.uri)
# doc = server.workspace.get_text_document(params.text_document.uri)
# try:
# logger.warning(parse(doc.source))
# except IncompleteParseError as e:
# server.publish_diagnostics(
# params.text_document.uri,
# [
# Diagnostic(
# Range(
# Position(e.line() - 1, e.column() - 1),
# Position(len(doc.lines), 0),
# ),
# str(e),
# severity=DiagnosticSeverity.Error,
# )
# ],
# )
# return []
# @server.feature(TEXT_DOCUMENT_DIAGNOSTIC)
# def document_diagnostic(params: DocumentDiagnosticParams) -> DocumentDiagnosticReport:
# doc = server.workspace.get_text_document(params.text_document.uri)
# p = IterativeParser()
# # parsed = (e for e in parse(doc.source) if isinstance(e, DocumentSymbol))
# # diags = [
# # Diagnostic(e.range, e.name, severity=DiagnosticSeverity.Error) for e in parsed
# # ]
# diags = p(doc.lines)
# return RelatedFullDocumentDiagnosticReport(diags)
@server.feature(TEXT_DOCUMENT_DID_OPEN) @server.feature(TEXT_DOCUMENT_DID_OPEN)
@ -109,8 +57,3 @@ def on_save(params: DidSaveTextDocumentParams) -> None:
def main(): def main():
server.start_io() server.start_io()
if __name__ == "__main__":
file = Path(__file__).parent.parent / "examples" / "example.il"
out = parse(file.read_text())

View File

@ -1,9 +1,19 @@
from abc import ABC
from dataclasses import dataclass, field from dataclasses import dataclass, field
from enum import Enum, auto from enum import Enum
from logging import getLogger from logging import getLogger
import re
from pathlib import Path
from typing import NamedTuple, Self from typing import NamedTuple, Self
from lsprotocol.types import Diagnostic, DiagnosticSeverity, Position, Range from lsprotocol.types import (
Diagnostic,
DiagnosticSeverity,
DocumentSymbol,
Position,
Range,
SymbolKind,
)
logger = getLogger(__name__) logger = getLogger(__name__)
@ -51,6 +61,156 @@ class StackElement(NamedTuple):
elem: SyntaxPair elem: SyntaxPair
WHITESPACE_OR_PAREN = re.compile(r"(\s|\(|\)|\[|\]|\'\()+")
TOKEN_REGEX = re.compile(r"\w[a-zA-Z0-9_]*")
NUMBER_REGEX = re.compile(r"\d+(\.\d+)?")
OPERATORS = re.compile(r"(->|~>|\+|\-|\*|\/|\=)")
@dataclass
class TreeToken(ABC):
content: str
range: Range
def String(content: str, range: Range) -> DocumentSymbol:
return DocumentSymbol(
name=content,
range=range,
kind=SymbolKind.String,
selection_range=range,
)
def Operator(content: str, range: Range) -> DocumentSymbol:
return DocumentSymbol(
name=content,
range=range,
kind=SymbolKind.Operator,
selection_range=range,
)
def Number(content: str, range: Range) -> DocumentSymbol:
return DocumentSymbol(
name=content,
range=range,
kind=SymbolKind.Number,
selection_range=range,
)
def Token(content: str, range: Range) -> DocumentSymbol:
return DocumentSymbol(
name=content,
range=range,
kind=SymbolKind.Variable,
selection_range=range,
)
RawIndex = int
ColIndex = int
LineIndex = int
@dataclass
class TokenParser:
_in_string: bool = False
_in_comment: bool = False
_token_tree: list[DocumentSymbol] = field(default_factory=list)
_current: str = ""
_line_indices: list[RawIndex] = field(default_factory=list)
def _get_line(self, index: RawIndex) -> tuple[LineIndex, RawIndex]:
for line, newline_pos in enumerate(self._line_indices):
if index < newline_pos:
return line, self._line_indices[line - 1] if line > 0 else 0
return len(self._line_indices), self._line_indices[-1]
def _get_range(self, start: RawIndex, end: RawIndex) -> Range:
start_line, start_line_index = self._get_line(start)
start_col = start - start_line_index - 1
end_line, end_line_index = self._get_line(end)
end_col = end - end_line_index - 1
return Range(Position(start_line, start_col), Position(end_line, end_col))
def _parse_string(self, raw: str, index: int) -> int:
stop = raw.index('"', index + 1)
self._token_tree.append(
String(raw[index : stop + 1], self._get_range(index, stop))
)
return stop + 1
def _parse_comment(self, raw: str, index: int) -> int:
stop = raw.index("\n", index)
# self._token_tree.append(Comment(raw[index:stop], self._get_range(index, stop)))
return stop + 1
def _parse_whitespace(self, raw: str, index: int) -> int:
if m := WHITESPACE_OR_PAREN.search(raw, index):
stop = m.end()
else:
stop = index + 1
# self._token_tree.append(Whitespace(raw[index:stop]))
return stop
def _parse_operator(self, raw: str, index: int) -> int:
if m := OPERATORS.search(raw, index):
stop = m.end()
else:
stop = index + 1
self._token_tree.append(
Operator(raw[index:stop], self._get_range(index, stop - 1))
)
return stop + 1
def _parse_token(self, raw: str, index: int) -> int:
if m := TOKEN_REGEX.search(raw, index):
stop = m.end()
else:
stop = index + 1
self._token_tree.append(
Token(raw[index:stop], self._get_range(index, stop - 1))
)
return stop
def _parse_number(self, raw: str, index: int) -> int:
if m := NUMBER_REGEX.search(raw, index):
stop = m.end()
else:
stop = index + 1
self._token_tree.append(
Number(raw[index:stop], self._get_range(index, stop - 1))
)
return stop
def prepare_content(self, raw: str) -> None:
self._line_indices = [i for i, char in enumerate(raw) if char == "\n"]
max_index = len(raw)
index = 0
while index < max_index:
if raw[index] == '"':
index = self._parse_string(raw, index)
elif raw[index] == ";":
index = self._parse_comment(raw, index)
elif WHITESPACE_OR_PAREN.match(raw[index : index + 2]):
index = self._parse_whitespace(raw, index)
elif OPERATORS.match(raw[index]):
index = self._parse_operator(raw, index)
elif NUMBER_REGEX.match(raw[index]):
index = self._parse_number(raw, index)
else:
index = self._parse_token(raw, index)
@dataclass() @dataclass()
class IterativeParser: class IterativeParser:
_stack: list[StackElement] = field(default_factory=list) _stack: list[StackElement] = field(default_factory=list)
@ -129,5 +289,8 @@ class IterativeParser:
if __name__ == "__main__": if __name__ == "__main__":
p = IterativeParser() example = Path(__file__).parent.parent.parent / "examples" / "example.il"
print(p(["((([]]))"]))
t = TokenParser()
t.prepare_content(example.read_text())
print(t._token_tree)

View File

@ -1,116 +1 @@
from collections.abc import Iterable, Iterator
from typing import Any, Sequence
from lsprotocol.types import DocumentSymbol, Range, SymbolKind
from parsimonious import ParseError
from dataclasses import dataclass
from parsimonious.nodes import Node, NodeVisitor
from .location import Locator
@dataclass(frozen=True)
class BaseToken:
range: Range
@dataclass(frozen=True)
class Literal(BaseToken):
value: str | float | bool
@dataclass(frozen=True)
class Token(BaseToken):
value: str
@dataclass(frozen=True)
class List(BaseToken):
value: list[BaseToken]
def flatten(xs: Iterable) -> Iterator[Any]:
for x in xs:
if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
yield from flatten(x)
else:
yield x
@dataclass
class SkillVisitor(NodeVisitor):
locator: Locator
def visit_skill(self, _: Node, visited_children: Sequence[Any]) -> list[BaseToken]:
return list(flatten(visited_children))
def visit_TOKEN(self, node: Node, _: Any) -> DocumentSymbol:
r = self.locator.locate(node)
print(r)
return DocumentSymbol(node.text, SymbolKind.Property, r, r)
def visit_LITERAL(self, node: Node, visited_children: list[None | Node]) -> Literal:
value, *_ = visited_children
if value:
match value.expr_name:
case "L_t":
return Literal(self.locator.locate(node), True)
case "L_nil":
return Literal(self.locator.locate(node), False)
case "L_num":
return Literal(self.locator.locate(node), float(value.text))
case "L_string":
return Literal(self.locator.locate(node), value.text)
case _:
pass
raise ParseError("something went wrong during literal parsing")
def visit_listraw(
self, node: Node, visited_children: list[list[list[Any]]]
) -> List:
rest = visited_children[2]
children = []
for child in rest:
for part in child:
if isinstance(part, BaseToken):
children.append(part)
return List(self.locator.locate(node), children)
def visit_listc(self, node: Node, visited_children: list[list[list[Any]]]) -> List:
rest = ([[visited_children[0]]], visited_children[2])
children = []
for child_list in rest:
for child in child_list:
for part in child:
if isinstance(part, BaseToken):
children.append(part)
return List(self.locator.locate(node), children)
def visit_listskill(
self, node: Node, visited_children: list[list[list[Any]]]
) -> List:
rest = visited_children[1]
children = []
for child in rest:
for part in child:
if isinstance(part, BaseToken):
children.append(part)
return List(self.locator.locate(node), children)
def visit_inline_assign(self, node: Node, visited_children: Sequence[Any]):
return visited_children or node
def generic_visit(
self, node: Node, visited_children: Sequence[Any]
) -> Node | Sequence[None | Node]:
return visited_children or node