replace peg parser with iterative approach
This commit is contained in:
parent
a924ca5710
commit
9e63218572
|
@ -1,3 +1,14 @@
|
||||||
example = nil
|
|
||||||
|
|
||||||
|
example = nil
|
||||||
example2 = example
|
example2 = example
|
||||||
|
|
||||||
|
(
|
||||||
|
example[qdqoifq]
|
||||||
|
(let (some vars (default 0))
|
||||||
|
; ... some wall of text
|
||||||
|
"))"
|
||||||
|
|
||||||
|
wqdqwf = '(doqwf)
|
||||||
|
qqvwv
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
|
@ -1,19 +1,33 @@
|
||||||
from logging import INFO, basicConfig, getLogger
|
from logging import INFO, basicConfig, getLogger
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from time import time
|
||||||
from lsprotocol.types import (
|
from lsprotocol.types import (
|
||||||
|
TEXT_DOCUMENT_DIAGNOSTIC,
|
||||||
|
TEXT_DOCUMENT_DID_CHANGE,
|
||||||
|
TEXT_DOCUMENT_DID_OPEN,
|
||||||
|
TEXT_DOCUMENT_DID_SAVE,
|
||||||
TEXT_DOCUMENT_DOCUMENT_SYMBOL,
|
TEXT_DOCUMENT_DOCUMENT_SYMBOL,
|
||||||
|
TEXT_DOCUMENT_PUBLISH_DIAGNOSTICS,
|
||||||
CompletionItem,
|
CompletionItem,
|
||||||
Diagnostic,
|
Diagnostic,
|
||||||
DiagnosticSeverity,
|
DiagnosticSeverity,
|
||||||
|
DidChangeTextDocumentParams,
|
||||||
|
DidOpenTextDocumentParams,
|
||||||
|
DidSaveTextDocumentParams,
|
||||||
|
DocumentDiagnosticParams,
|
||||||
|
DocumentDiagnosticReport,
|
||||||
DocumentSymbol,
|
DocumentSymbol,
|
||||||
DocumentSymbolParams,
|
DocumentSymbolParams,
|
||||||
Position,
|
Position,
|
||||||
Range,
|
Range,
|
||||||
|
RelatedFullDocumentDiagnosticReport,
|
||||||
)
|
)
|
||||||
|
|
||||||
from pygls.server import LanguageServer
|
from pygls.server import LanguageServer
|
||||||
from parsimonious import Grammar, IncompleteParseError
|
from parsimonious import Grammar, IncompleteParseError
|
||||||
|
|
||||||
|
from skillls.parsing.iterative import IterativeParser
|
||||||
|
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .parsing.tokenize import Locator, SkillVisitor
|
from .parsing.tokenize import Locator, SkillVisitor
|
||||||
|
|
||||||
|
@ -31,7 +45,7 @@ def parse(content: str):
|
||||||
path = Path(__file__).parent / "grammar.peg"
|
path = Path(__file__).parent / "grammar.peg"
|
||||||
grammar = Grammar(path.read_text())
|
grammar = Grammar(path.read_text())
|
||||||
|
|
||||||
locator = Locator(content)
|
locator = Locator(content.split())
|
||||||
tree = grammar.parse(content)
|
tree = grammar.parse(content)
|
||||||
|
|
||||||
iv = SkillVisitor(locator)
|
iv = SkillVisitor(locator)
|
||||||
|
@ -40,27 +54,57 @@ def parse(content: str):
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
@server.feature(TEXT_DOCUMENT_DOCUMENT_SYMBOL)
|
# @server.feature(TEXT_DOCUMENT_DOCUMENT_SYMBOL)
|
||||||
def document_symbols(params: DocumentSymbolParams) -> list[DocumentSymbol]:
|
# def document_symbols(params: DocumentSymbolParams) -> list[DocumentSymbol]:
|
||||||
logger.warning("requested document symbols for %s", params.text_document.uri)
|
# logger.warning("requested document symbols for %s", params.text_document.uri)
|
||||||
|
# doc = server.workspace.get_text_document(params.text_document.uri)
|
||||||
|
# try:
|
||||||
|
# logger.warning(parse(doc.source))
|
||||||
|
# except IncompleteParseError as e:
|
||||||
|
# server.publish_diagnostics(
|
||||||
|
# params.text_document.uri,
|
||||||
|
# [
|
||||||
|
# Diagnostic(
|
||||||
|
# Range(
|
||||||
|
# Position(e.line() - 1, e.column() - 1),
|
||||||
|
# Position(len(doc.lines), 0),
|
||||||
|
# ),
|
||||||
|
# str(e),
|
||||||
|
# severity=DiagnosticSeverity.Error,
|
||||||
|
# )
|
||||||
|
# ],
|
||||||
|
# )
|
||||||
|
# return []
|
||||||
|
|
||||||
|
|
||||||
|
# @server.feature(TEXT_DOCUMENT_DIAGNOSTIC)
|
||||||
|
# def document_diagnostic(params: DocumentDiagnosticParams) -> DocumentDiagnosticReport:
|
||||||
|
# doc = server.workspace.get_text_document(params.text_document.uri)
|
||||||
|
# p = IterativeParser()
|
||||||
|
# # parsed = (e for e in parse(doc.source) if isinstance(e, DocumentSymbol))
|
||||||
|
# # diags = [
|
||||||
|
# # Diagnostic(e.range, e.name, severity=DiagnosticSeverity.Error) for e in parsed
|
||||||
|
# # ]
|
||||||
|
# diags = p(doc.lines)
|
||||||
|
# return RelatedFullDocumentDiagnosticReport(diags)
|
||||||
|
|
||||||
|
|
||||||
|
@server.feature(TEXT_DOCUMENT_DID_OPEN)
|
||||||
|
def on_open(params: DidOpenTextDocumentParams) -> None:
|
||||||
doc = server.workspace.get_text_document(params.text_document.uri)
|
doc = server.workspace.get_text_document(params.text_document.uri)
|
||||||
try:
|
p = IterativeParser()
|
||||||
logger.warning(parse(doc.source))
|
diags = p(doc.lines)
|
||||||
except IncompleteParseError as e:
|
server.publish_diagnostics(params.text_document.uri, diags, version=int(time()))
|
||||||
server.publish_diagnostics(
|
|
||||||
params.text_document.uri,
|
|
||||||
[
|
@server.feature(TEXT_DOCUMENT_DID_SAVE)
|
||||||
Diagnostic(
|
def on_save(params: DidSaveTextDocumentParams) -> None:
|
||||||
Range(
|
server.workspace.remove_text_document(params.text_document.uri)
|
||||||
Position(e.line() - 1, e.column() - 1),
|
doc = server.workspace.get_text_document(params.text_document.uri)
|
||||||
Position(len(doc.lines), 0),
|
p = IterativeParser()
|
||||||
),
|
diags = p(doc.lines)
|
||||||
str(e),
|
logger.warning(doc.source)
|
||||||
severity=DiagnosticSeverity.Error,
|
server.publish_diagnostics(params.text_document.uri, diags, version=int(time()))
|
||||||
)
|
|
||||||
],
|
|
||||||
)
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -69,4 +113,4 @@ def main():
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
file = Path(__file__).parent.parent / "examples" / "example.il"
|
file = Path(__file__).parent.parent / "examples" / "example.il"
|
||||||
parse(file.read_text())
|
out = parse(file.read_text())
|
||||||
|
|
|
@ -0,0 +1,133 @@
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from enum import Enum, auto
|
||||||
|
from logging import getLogger
|
||||||
|
from typing import NamedTuple, Self
|
||||||
|
|
||||||
|
from lsprotocol.types import Diagnostic, DiagnosticSeverity, Position, Range
|
||||||
|
|
||||||
|
logger = getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Pair(NamedTuple):
|
||||||
|
start: str
|
||||||
|
end: str
|
||||||
|
|
||||||
|
|
||||||
|
class SyntaxPair(Enum):
|
||||||
|
Paren = Pair("(", ")")
|
||||||
|
Square = Pair("[", "]")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def by_start_elem(cls, start: str) -> Self:
|
||||||
|
for option in cls:
|
||||||
|
if option.value[0] == start:
|
||||||
|
return option
|
||||||
|
|
||||||
|
raise ValueError(f"`{start}` not a valid start character")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def by_end_elem(cls, end: str) -> Self:
|
||||||
|
for option in cls:
|
||||||
|
if option.value[1] == end:
|
||||||
|
return option
|
||||||
|
|
||||||
|
raise ValueError(f"`{end}` not a valid end character")
|
||||||
|
|
||||||
|
|
||||||
|
def char_range(line: int, char: int) -> Range:
|
||||||
|
return Range(Position(line, char), Position(line, char + 1))
|
||||||
|
|
||||||
|
|
||||||
|
def pair_mismatch(line: int, char: int, msg: str) -> Diagnostic:
|
||||||
|
return Diagnostic(
|
||||||
|
char_range(line, char),
|
||||||
|
msg,
|
||||||
|
severity=DiagnosticSeverity.Error,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StackElement(NamedTuple):
|
||||||
|
range: Range
|
||||||
|
elem: SyntaxPair
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class IterativeParser:
|
||||||
|
_stack: list[StackElement] = field(default_factory=list)
|
||||||
|
|
||||||
|
def peek(self) -> StackElement:
|
||||||
|
return self._stack[-1]
|
||||||
|
|
||||||
|
def pop(self) -> StackElement:
|
||||||
|
return self._stack.pop()
|
||||||
|
|
||||||
|
def push(self, pair: StackElement) -> None:
|
||||||
|
return self._stack.append(pair)
|
||||||
|
|
||||||
|
def __call__(self, raw: list[str]) -> list[Diagnostic]:
|
||||||
|
in_string = False
|
||||||
|
errs = []
|
||||||
|
for line, raw_line in enumerate(raw):
|
||||||
|
for char, raw_char in enumerate(raw_line):
|
||||||
|
match raw_char:
|
||||||
|
case ";":
|
||||||
|
if not in_string:
|
||||||
|
break
|
||||||
|
case '"':
|
||||||
|
in_string = not in_string
|
||||||
|
case "(" | "[":
|
||||||
|
if not in_string:
|
||||||
|
self.push(
|
||||||
|
StackElement(
|
||||||
|
char_range(line, char),
|
||||||
|
SyntaxPair.by_start_elem(raw_char),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
case "]" | ")":
|
||||||
|
if not in_string:
|
||||||
|
if not self._stack:
|
||||||
|
errs.append(
|
||||||
|
pair_mismatch(
|
||||||
|
line, char, f"one {raw_char} too much"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
expected = SyntaxPair.by_end_elem(raw_char)
|
||||||
|
elem = self._stack.pop()
|
||||||
|
if elem.elem == expected:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if self._stack and self._stack[-1].elem == expected:
|
||||||
|
errs.append(
|
||||||
|
pair_mismatch(
|
||||||
|
line, char, f"unclosed {elem.elem.value.start}"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self._stack.pop()
|
||||||
|
self._stack.append(elem)
|
||||||
|
else:
|
||||||
|
errs.append(
|
||||||
|
pair_mismatch(
|
||||||
|
line, char, f"one {raw_char} too much"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self._stack.append(elem)
|
||||||
|
|
||||||
|
for rest in self._stack:
|
||||||
|
errs.append(
|
||||||
|
Diagnostic(
|
||||||
|
rest.range,
|
||||||
|
f"unclosed {rest.elem.value.start}",
|
||||||
|
severity=DiagnosticSeverity.Error,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self._stack = []
|
||||||
|
|
||||||
|
return errs
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
p = IterativeParser()
|
||||||
|
print(p(["((([]]))"]))
|
|
@ -14,13 +14,14 @@ class Locator:
|
||||||
counter = 0
|
counter = 0
|
||||||
line = 0
|
line = 0
|
||||||
for ix, raw_line in enumerate(self.raw):
|
for ix, raw_line in enumerate(self.raw):
|
||||||
if counter + len(raw_line) > index:
|
if counter + len(raw_line) + 1 > index:
|
||||||
line = ix
|
line = ix
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
counter += len(raw_line)
|
counter += len(raw_line) + 1
|
||||||
|
|
||||||
return Position(line, index - counter)
|
print(counter, line)
|
||||||
|
return Position(line + 1, index - counter + 1)
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def locate(self, index: int) -> Position:
|
def locate(self, index: int) -> Position:
|
||||||
|
@ -34,6 +35,7 @@ class Locator:
|
||||||
if isinstance(index, int):
|
if isinstance(index, int):
|
||||||
return self._locate_pos(index)
|
return self._locate_pos(index)
|
||||||
|
|
||||||
|
print(index.start, index.end)
|
||||||
start = self._locate_pos(index.start)
|
start = self._locate_pos(index.start)
|
||||||
end = self._locate_pos(index.end)
|
end = self._locate_pos(index.end)
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
|
from collections.abc import Iterable, Iterator
|
||||||
from typing import Any, Sequence
|
from typing import Any, Sequence
|
||||||
from lsprotocol.types import Range
|
from lsprotocol.types import DocumentSymbol, Range, SymbolKind
|
||||||
from parsimonious import ParseError
|
from parsimonious import ParseError
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
@ -28,21 +29,25 @@ class List(BaseToken):
|
||||||
value: list[BaseToken]
|
value: list[BaseToken]
|
||||||
|
|
||||||
|
|
||||||
|
def flatten(xs: Iterable) -> Iterator[Any]:
|
||||||
|
for x in xs:
|
||||||
|
if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
|
||||||
|
yield from flatten(x)
|
||||||
|
else:
|
||||||
|
yield x
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class SkillVisitor(NodeVisitor):
|
class SkillVisitor(NodeVisitor):
|
||||||
locator: Locator
|
locator: Locator
|
||||||
|
|
||||||
def visit_skill(self, _: Node, visited_children: Sequence[Any]) -> list[BaseToken]:
|
def visit_skill(self, _: Node, visited_children: Sequence[Any]) -> list[BaseToken]:
|
||||||
children = []
|
return list(flatten(visited_children))
|
||||||
for childlist in visited_children:
|
|
||||||
for child in childlist:
|
|
||||||
if isinstance(child, BaseToken):
|
|
||||||
children.append(child)
|
|
||||||
|
|
||||||
return children
|
def visit_TOKEN(self, node: Node, _: Any) -> DocumentSymbol:
|
||||||
|
r = self.locator.locate(node)
|
||||||
def visit_TOKEN(self, node: Node, _: Any) -> Token:
|
print(r)
|
||||||
return Token(self.locator.locate(node), node.text)
|
return DocumentSymbol(node.text, SymbolKind.Property, r, r)
|
||||||
|
|
||||||
def visit_LITERAL(self, node: Node, visited_children: list[None | Node]) -> Literal:
|
def visit_LITERAL(self, node: Node, visited_children: list[None | Node]) -> Literal:
|
||||||
value, *_ = visited_children
|
value, *_ = visited_children
|
||||||
|
@ -103,7 +108,7 @@ class SkillVisitor(NodeVisitor):
|
||||||
return List(self.locator.locate(node), children)
|
return List(self.locator.locate(node), children)
|
||||||
|
|
||||||
def visit_inline_assign(self, node: Node, visited_children: Sequence[Any]):
|
def visit_inline_assign(self, node: Node, visited_children: Sequence[Any]):
|
||||||
print(node)
|
return visited_children or node
|
||||||
|
|
||||||
def generic_visit(
|
def generic_visit(
|
||||||
self, node: Node, visited_children: Sequence[Any]
|
self, node: Node, visited_children: Sequence[Any]
|
||||||
|
|
Loading…
Reference in New Issue