complete simplest token parser and add document symbol lookup
This commit is contained in:
parent
28e137988f
commit
1d5d744f64
|
@ -7,6 +7,14 @@
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "/home/patrick/git/skill-ls/.venv/bin/skillls",
|
"program": "/home/patrick/git/skill-ls/.venv/bin/skillls",
|
||||||
"python": "/home/patrick/git/skill-ls/.venv/bin/python"
|
"python": "/home/patrick/git/skill-ls/.venv/bin/python"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "main",
|
||||||
|
"type": "python",
|
||||||
|
"request": "launch",
|
||||||
|
"module": "skillls.parsing.iterative",
|
||||||
|
"python": "/home/patrick/git/skill-ls/.venv/bin/python"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,12 +3,13 @@ example = nil
|
||||||
example2 = example
|
example2 = example
|
||||||
|
|
||||||
(
|
(
|
||||||
example[qdqoifq]
|
|
||||||
(let (some vars (default 0))
|
(let (some vars (default 0))
|
||||||
; ... some wall of text
|
; ... some wall of text
|
||||||
"))"
|
"))"
|
||||||
|
|
||||||
wqdqwf = '(doqwf)
|
wqdqwf = '(doqwf)
|
||||||
|
var = 1.3
|
||||||
|
var = 231
|
||||||
qqvwv
|
qqvwv
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,35 +1,21 @@
|
||||||
from logging import INFO, basicConfig, getLogger
|
from logging import INFO, basicConfig, getLogger
|
||||||
from pathlib import Path
|
|
||||||
from time import time
|
from time import time
|
||||||
from lsprotocol.types import (
|
from lsprotocol.types import (
|
||||||
TEXT_DOCUMENT_DIAGNOSTIC,
|
|
||||||
TEXT_DOCUMENT_DID_CHANGE,
|
|
||||||
TEXT_DOCUMENT_DID_OPEN,
|
TEXT_DOCUMENT_DID_OPEN,
|
||||||
TEXT_DOCUMENT_DID_SAVE,
|
TEXT_DOCUMENT_DID_SAVE,
|
||||||
TEXT_DOCUMENT_DOCUMENT_SYMBOL,
|
TEXT_DOCUMENT_DOCUMENT_SYMBOL,
|
||||||
TEXT_DOCUMENT_PUBLISH_DIAGNOSTICS,
|
|
||||||
CompletionItem,
|
CompletionItem,
|
||||||
Diagnostic,
|
|
||||||
DiagnosticSeverity,
|
|
||||||
DidChangeTextDocumentParams,
|
|
||||||
DidOpenTextDocumentParams,
|
DidOpenTextDocumentParams,
|
||||||
DidSaveTextDocumentParams,
|
DidSaveTextDocumentParams,
|
||||||
DocumentDiagnosticParams,
|
|
||||||
DocumentDiagnosticReport,
|
|
||||||
DocumentSymbol,
|
DocumentSymbol,
|
||||||
DocumentSymbolParams,
|
DocumentSymbolParams,
|
||||||
Position,
|
|
||||||
Range,
|
|
||||||
RelatedFullDocumentDiagnosticReport,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
from pygls.server import LanguageServer
|
from pygls.server import LanguageServer
|
||||||
from parsimonious import Grammar, IncompleteParseError
|
|
||||||
|
|
||||||
from skillls.parsing.iterative import IterativeParser
|
from skillls.parsing.iterative import IterativeParser, TokenParser
|
||||||
|
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .parsing.tokenize import Locator, SkillVisitor
|
|
||||||
|
|
||||||
|
|
||||||
URI = str
|
URI = str
|
||||||
|
@ -41,52 +27,14 @@ logger = getLogger(__name__)
|
||||||
server = LanguageServer("skillls", "v0.1")
|
server = LanguageServer("skillls", "v0.1")
|
||||||
|
|
||||||
|
|
||||||
def parse(content: str):
|
@server.feature(TEXT_DOCUMENT_DOCUMENT_SYMBOL)
|
||||||
path = Path(__file__).parent / "grammar.peg"
|
def on_hover(params: DocumentSymbolParams) -> list[DocumentSymbol]:
|
||||||
grammar = Grammar(path.read_text())
|
server.workspace.remove_text_document(params.text_document.uri)
|
||||||
|
doc = server.workspace.get_text_document(params.text_document.uri)
|
||||||
|
t = TokenParser()
|
||||||
|
t.prepare_content(doc.source)
|
||||||
|
|
||||||
locator = Locator(content.split())
|
return t._token_tree
|
||||||
tree = grammar.parse(content)
|
|
||||||
|
|
||||||
iv = SkillVisitor(locator)
|
|
||||||
output = iv.visit(tree)
|
|
||||||
|
|
||||||
return output
|
|
||||||
|
|
||||||
|
|
||||||
# @server.feature(TEXT_DOCUMENT_DOCUMENT_SYMBOL)
|
|
||||||
# def document_symbols(params: DocumentSymbolParams) -> list[DocumentSymbol]:
|
|
||||||
# logger.warning("requested document symbols for %s", params.text_document.uri)
|
|
||||||
# doc = server.workspace.get_text_document(params.text_document.uri)
|
|
||||||
# try:
|
|
||||||
# logger.warning(parse(doc.source))
|
|
||||||
# except IncompleteParseError as e:
|
|
||||||
# server.publish_diagnostics(
|
|
||||||
# params.text_document.uri,
|
|
||||||
# [
|
|
||||||
# Diagnostic(
|
|
||||||
# Range(
|
|
||||||
# Position(e.line() - 1, e.column() - 1),
|
|
||||||
# Position(len(doc.lines), 0),
|
|
||||||
# ),
|
|
||||||
# str(e),
|
|
||||||
# severity=DiagnosticSeverity.Error,
|
|
||||||
# )
|
|
||||||
# ],
|
|
||||||
# )
|
|
||||||
# return []
|
|
||||||
|
|
||||||
|
|
||||||
# @server.feature(TEXT_DOCUMENT_DIAGNOSTIC)
|
|
||||||
# def document_diagnostic(params: DocumentDiagnosticParams) -> DocumentDiagnosticReport:
|
|
||||||
# doc = server.workspace.get_text_document(params.text_document.uri)
|
|
||||||
# p = IterativeParser()
|
|
||||||
# # parsed = (e for e in parse(doc.source) if isinstance(e, DocumentSymbol))
|
|
||||||
# # diags = [
|
|
||||||
# # Diagnostic(e.range, e.name, severity=DiagnosticSeverity.Error) for e in parsed
|
|
||||||
# # ]
|
|
||||||
# diags = p(doc.lines)
|
|
||||||
# return RelatedFullDocumentDiagnosticReport(diags)
|
|
||||||
|
|
||||||
|
|
||||||
@server.feature(TEXT_DOCUMENT_DID_OPEN)
|
@server.feature(TEXT_DOCUMENT_DID_OPEN)
|
||||||
|
@ -109,8 +57,3 @@ def on_save(params: DidSaveTextDocumentParams) -> None:
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
server.start_io()
|
server.start_io()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
file = Path(__file__).parent.parent / "examples" / "example.il"
|
|
||||||
out = parse(file.read_text())
|
|
||||||
|
|
|
@ -1,9 +1,19 @@
|
||||||
|
from abc import ABC
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from enum import Enum, auto
|
from enum import Enum
|
||||||
from logging import getLogger
|
from logging import getLogger
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
from typing import NamedTuple, Self
|
from typing import NamedTuple, Self
|
||||||
|
|
||||||
from lsprotocol.types import Diagnostic, DiagnosticSeverity, Position, Range
|
from lsprotocol.types import (
|
||||||
|
Diagnostic,
|
||||||
|
DiagnosticSeverity,
|
||||||
|
DocumentSymbol,
|
||||||
|
Position,
|
||||||
|
Range,
|
||||||
|
SymbolKind,
|
||||||
|
)
|
||||||
|
|
||||||
logger = getLogger(__name__)
|
logger = getLogger(__name__)
|
||||||
|
|
||||||
|
@ -51,6 +61,156 @@ class StackElement(NamedTuple):
|
||||||
elem: SyntaxPair
|
elem: SyntaxPair
|
||||||
|
|
||||||
|
|
||||||
|
WHITESPACE_OR_PAREN = re.compile(r"(\s|\(|\)|\[|\]|\'\()+")
|
||||||
|
TOKEN_REGEX = re.compile(r"\w[a-zA-Z0-9_]*")
|
||||||
|
NUMBER_REGEX = re.compile(r"\d+(\.\d+)?")
|
||||||
|
OPERATORS = re.compile(r"(->|~>|\+|\-|\*|\/|\=)")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TreeToken(ABC):
|
||||||
|
content: str
|
||||||
|
range: Range
|
||||||
|
|
||||||
|
|
||||||
|
def String(content: str, range: Range) -> DocumentSymbol:
|
||||||
|
return DocumentSymbol(
|
||||||
|
name=content,
|
||||||
|
range=range,
|
||||||
|
kind=SymbolKind.String,
|
||||||
|
selection_range=range,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def Operator(content: str, range: Range) -> DocumentSymbol:
|
||||||
|
return DocumentSymbol(
|
||||||
|
name=content,
|
||||||
|
range=range,
|
||||||
|
kind=SymbolKind.Operator,
|
||||||
|
selection_range=range,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def Number(content: str, range: Range) -> DocumentSymbol:
|
||||||
|
return DocumentSymbol(
|
||||||
|
name=content,
|
||||||
|
range=range,
|
||||||
|
kind=SymbolKind.Number,
|
||||||
|
selection_range=range,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def Token(content: str, range: Range) -> DocumentSymbol:
|
||||||
|
return DocumentSymbol(
|
||||||
|
name=content,
|
||||||
|
range=range,
|
||||||
|
kind=SymbolKind.Variable,
|
||||||
|
selection_range=range,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
RawIndex = int
|
||||||
|
ColIndex = int
|
||||||
|
LineIndex = int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TokenParser:
|
||||||
|
_in_string: bool = False
|
||||||
|
_in_comment: bool = False
|
||||||
|
_token_tree: list[DocumentSymbol] = field(default_factory=list)
|
||||||
|
_current: str = ""
|
||||||
|
_line_indices: list[RawIndex] = field(default_factory=list)
|
||||||
|
|
||||||
|
def _get_line(self, index: RawIndex) -> tuple[LineIndex, RawIndex]:
|
||||||
|
for line, newline_pos in enumerate(self._line_indices):
|
||||||
|
if index < newline_pos:
|
||||||
|
return line, self._line_indices[line - 1] if line > 0 else 0
|
||||||
|
|
||||||
|
return len(self._line_indices), self._line_indices[-1]
|
||||||
|
|
||||||
|
def _get_range(self, start: RawIndex, end: RawIndex) -> Range:
|
||||||
|
start_line, start_line_index = self._get_line(start)
|
||||||
|
start_col = start - start_line_index - 1
|
||||||
|
|
||||||
|
end_line, end_line_index = self._get_line(end)
|
||||||
|
end_col = end - end_line_index - 1
|
||||||
|
|
||||||
|
return Range(Position(start_line, start_col), Position(end_line, end_col))
|
||||||
|
|
||||||
|
def _parse_string(self, raw: str, index: int) -> int:
|
||||||
|
stop = raw.index('"', index + 1)
|
||||||
|
self._token_tree.append(
|
||||||
|
String(raw[index : stop + 1], self._get_range(index, stop))
|
||||||
|
)
|
||||||
|
return stop + 1
|
||||||
|
|
||||||
|
def _parse_comment(self, raw: str, index: int) -> int:
|
||||||
|
stop = raw.index("\n", index)
|
||||||
|
# self._token_tree.append(Comment(raw[index:stop], self._get_range(index, stop)))
|
||||||
|
return stop + 1
|
||||||
|
|
||||||
|
def _parse_whitespace(self, raw: str, index: int) -> int:
|
||||||
|
if m := WHITESPACE_OR_PAREN.search(raw, index):
|
||||||
|
stop = m.end()
|
||||||
|
else:
|
||||||
|
stop = index + 1
|
||||||
|
|
||||||
|
# self._token_tree.append(Whitespace(raw[index:stop]))
|
||||||
|
return stop
|
||||||
|
|
||||||
|
def _parse_operator(self, raw: str, index: int) -> int:
|
||||||
|
if m := OPERATORS.search(raw, index):
|
||||||
|
stop = m.end()
|
||||||
|
else:
|
||||||
|
stop = index + 1
|
||||||
|
|
||||||
|
self._token_tree.append(
|
||||||
|
Operator(raw[index:stop], self._get_range(index, stop - 1))
|
||||||
|
)
|
||||||
|
return stop + 1
|
||||||
|
|
||||||
|
def _parse_token(self, raw: str, index: int) -> int:
|
||||||
|
if m := TOKEN_REGEX.search(raw, index):
|
||||||
|
stop = m.end()
|
||||||
|
else:
|
||||||
|
stop = index + 1
|
||||||
|
|
||||||
|
self._token_tree.append(
|
||||||
|
Token(raw[index:stop], self._get_range(index, stop - 1))
|
||||||
|
)
|
||||||
|
return stop
|
||||||
|
|
||||||
|
def _parse_number(self, raw: str, index: int) -> int:
|
||||||
|
if m := NUMBER_REGEX.search(raw, index):
|
||||||
|
stop = m.end()
|
||||||
|
else:
|
||||||
|
stop = index + 1
|
||||||
|
|
||||||
|
self._token_tree.append(
|
||||||
|
Number(raw[index:stop], self._get_range(index, stop - 1))
|
||||||
|
)
|
||||||
|
return stop
|
||||||
|
|
||||||
|
def prepare_content(self, raw: str) -> None:
|
||||||
|
self._line_indices = [i for i, char in enumerate(raw) if char == "\n"]
|
||||||
|
max_index = len(raw)
|
||||||
|
index = 0
|
||||||
|
while index < max_index:
|
||||||
|
if raw[index] == '"':
|
||||||
|
index = self._parse_string(raw, index)
|
||||||
|
elif raw[index] == ";":
|
||||||
|
index = self._parse_comment(raw, index)
|
||||||
|
elif WHITESPACE_OR_PAREN.match(raw[index : index + 2]):
|
||||||
|
index = self._parse_whitespace(raw, index)
|
||||||
|
elif OPERATORS.match(raw[index]):
|
||||||
|
index = self._parse_operator(raw, index)
|
||||||
|
elif NUMBER_REGEX.match(raw[index]):
|
||||||
|
index = self._parse_number(raw, index)
|
||||||
|
else:
|
||||||
|
index = self._parse_token(raw, index)
|
||||||
|
|
||||||
|
|
||||||
@dataclass()
|
@dataclass()
|
||||||
class IterativeParser:
|
class IterativeParser:
|
||||||
_stack: list[StackElement] = field(default_factory=list)
|
_stack: list[StackElement] = field(default_factory=list)
|
||||||
|
@ -129,5 +289,8 @@ class IterativeParser:
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
p = IterativeParser()
|
example = Path(__file__).parent.parent.parent / "examples" / "example.il"
|
||||||
print(p(["((([]]))"]))
|
|
||||||
|
t = TokenParser()
|
||||||
|
t.prepare_content(example.read_text())
|
||||||
|
print(t._token_tree)
|
||||||
|
|
|
@ -1,116 +1 @@
|
||||||
from collections.abc import Iterable, Iterator
|
|
||||||
from typing import Any, Sequence
|
|
||||||
from lsprotocol.types import DocumentSymbol, Range, SymbolKind
|
|
||||||
from parsimonious import ParseError
|
|
||||||
from dataclasses import dataclass
|
|
||||||
|
|
||||||
from parsimonious.nodes import Node, NodeVisitor
|
|
||||||
|
|
||||||
from .location import Locator
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class BaseToken:
|
|
||||||
range: Range
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class Literal(BaseToken):
|
|
||||||
value: str | float | bool
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class Token(BaseToken):
|
|
||||||
value: str
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class List(BaseToken):
|
|
||||||
value: list[BaseToken]
|
|
||||||
|
|
||||||
|
|
||||||
def flatten(xs: Iterable) -> Iterator[Any]:
|
|
||||||
for x in xs:
|
|
||||||
if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
|
|
||||||
yield from flatten(x)
|
|
||||||
else:
|
|
||||||
yield x
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SkillVisitor(NodeVisitor):
|
|
||||||
locator: Locator
|
|
||||||
|
|
||||||
def visit_skill(self, _: Node, visited_children: Sequence[Any]) -> list[BaseToken]:
|
|
||||||
return list(flatten(visited_children))
|
|
||||||
|
|
||||||
def visit_TOKEN(self, node: Node, _: Any) -> DocumentSymbol:
|
|
||||||
r = self.locator.locate(node)
|
|
||||||
print(r)
|
|
||||||
return DocumentSymbol(node.text, SymbolKind.Property, r, r)
|
|
||||||
|
|
||||||
def visit_LITERAL(self, node: Node, visited_children: list[None | Node]) -> Literal:
|
|
||||||
value, *_ = visited_children
|
|
||||||
if value:
|
|
||||||
match value.expr_name:
|
|
||||||
case "L_t":
|
|
||||||
return Literal(self.locator.locate(node), True)
|
|
||||||
case "L_nil":
|
|
||||||
return Literal(self.locator.locate(node), False)
|
|
||||||
case "L_num":
|
|
||||||
return Literal(self.locator.locate(node), float(value.text))
|
|
||||||
case "L_string":
|
|
||||||
return Literal(self.locator.locate(node), value.text)
|
|
||||||
case _:
|
|
||||||
pass
|
|
||||||
|
|
||||||
raise ParseError("something went wrong during literal parsing")
|
|
||||||
|
|
||||||
def visit_listraw(
|
|
||||||
self, node: Node, visited_children: list[list[list[Any]]]
|
|
||||||
) -> List:
|
|
||||||
rest = visited_children[2]
|
|
||||||
|
|
||||||
children = []
|
|
||||||
|
|
||||||
for child in rest:
|
|
||||||
for part in child:
|
|
||||||
if isinstance(part, BaseToken):
|
|
||||||
children.append(part)
|
|
||||||
|
|
||||||
return List(self.locator.locate(node), children)
|
|
||||||
|
|
||||||
def visit_listc(self, node: Node, visited_children: list[list[list[Any]]]) -> List:
|
|
||||||
rest = ([[visited_children[0]]], visited_children[2])
|
|
||||||
|
|
||||||
children = []
|
|
||||||
|
|
||||||
for child_list in rest:
|
|
||||||
for child in child_list:
|
|
||||||
for part in child:
|
|
||||||
if isinstance(part, BaseToken):
|
|
||||||
children.append(part)
|
|
||||||
|
|
||||||
return List(self.locator.locate(node), children)
|
|
||||||
|
|
||||||
def visit_listskill(
|
|
||||||
self, node: Node, visited_children: list[list[list[Any]]]
|
|
||||||
) -> List:
|
|
||||||
rest = visited_children[1]
|
|
||||||
|
|
||||||
children = []
|
|
||||||
|
|
||||||
for child in rest:
|
|
||||||
for part in child:
|
|
||||||
if isinstance(part, BaseToken):
|
|
||||||
children.append(part)
|
|
||||||
|
|
||||||
return List(self.locator.locate(node), children)
|
|
||||||
|
|
||||||
def visit_inline_assign(self, node: Node, visited_children: Sequence[Any]):
|
|
||||||
return visited_children or node
|
|
||||||
|
|
||||||
def generic_visit(
|
|
||||||
self, node: Node, visited_children: Sequence[Any]
|
|
||||||
) -> Node | Sequence[None | Node]:
|
|
||||||
return visited_children or node
|
|
||||||
|
|
Loading…
Reference in New Issue