replace peg parser with iterative approach

This commit is contained in:
Patrick 2023-10-19 22:06:08 +02:00 committed by AcerecA
parent a924ca5710
commit 9e63218572
5 changed files with 232 additions and 37 deletions

View File

@ -1,3 +1,14 @@
example = nil
example = nil
example2 = example example2 = example
(
example[qdqoifq]
(let (some vars (default 0))
; ... some wall of text
"))"
wqdqwf = '(doqwf)
qqvwv
)
)

View File

@ -1,19 +1,33 @@
from logging import INFO, basicConfig, getLogger from logging import INFO, basicConfig, getLogger
from pathlib import Path from pathlib import Path
from time import time
from lsprotocol.types import ( from lsprotocol.types import (
TEXT_DOCUMENT_DIAGNOSTIC,
TEXT_DOCUMENT_DID_CHANGE,
TEXT_DOCUMENT_DID_OPEN,
TEXT_DOCUMENT_DID_SAVE,
TEXT_DOCUMENT_DOCUMENT_SYMBOL, TEXT_DOCUMENT_DOCUMENT_SYMBOL,
TEXT_DOCUMENT_PUBLISH_DIAGNOSTICS,
CompletionItem, CompletionItem,
Diagnostic, Diagnostic,
DiagnosticSeverity, DiagnosticSeverity,
DidChangeTextDocumentParams,
DidOpenTextDocumentParams,
DidSaveTextDocumentParams,
DocumentDiagnosticParams,
DocumentDiagnosticReport,
DocumentSymbol, DocumentSymbol,
DocumentSymbolParams, DocumentSymbolParams,
Position, Position,
Range, Range,
RelatedFullDocumentDiagnosticReport,
) )
from pygls.server import LanguageServer from pygls.server import LanguageServer
from parsimonious import Grammar, IncompleteParseError from parsimonious import Grammar, IncompleteParseError
from skillls.parsing.iterative import IterativeParser
from .cache import Cache from .cache import Cache
from .parsing.tokenize import Locator, SkillVisitor from .parsing.tokenize import Locator, SkillVisitor
@ -31,7 +45,7 @@ def parse(content: str):
path = Path(__file__).parent / "grammar.peg" path = Path(__file__).parent / "grammar.peg"
grammar = Grammar(path.read_text()) grammar = Grammar(path.read_text())
locator = Locator(content) locator = Locator(content.split())
tree = grammar.parse(content) tree = grammar.parse(content)
iv = SkillVisitor(locator) iv = SkillVisitor(locator)
@ -40,27 +54,57 @@ def parse(content: str):
return output return output
@server.feature(TEXT_DOCUMENT_DOCUMENT_SYMBOL) # @server.feature(TEXT_DOCUMENT_DOCUMENT_SYMBOL)
def document_symbols(params: DocumentSymbolParams) -> list[DocumentSymbol]: # def document_symbols(params: DocumentSymbolParams) -> list[DocumentSymbol]:
logger.warning("requested document symbols for %s", params.text_document.uri) # logger.warning("requested document symbols for %s", params.text_document.uri)
# doc = server.workspace.get_text_document(params.text_document.uri)
# try:
# logger.warning(parse(doc.source))
# except IncompleteParseError as e:
# server.publish_diagnostics(
# params.text_document.uri,
# [
# Diagnostic(
# Range(
# Position(e.line() - 1, e.column() - 1),
# Position(len(doc.lines), 0),
# ),
# str(e),
# severity=DiagnosticSeverity.Error,
# )
# ],
# )
# return []
# @server.feature(TEXT_DOCUMENT_DIAGNOSTIC)
# def document_diagnostic(params: DocumentDiagnosticParams) -> DocumentDiagnosticReport:
# doc = server.workspace.get_text_document(params.text_document.uri)
# p = IterativeParser()
# # parsed = (e for e in parse(doc.source) if isinstance(e, DocumentSymbol))
# # diags = [
# # Diagnostic(e.range, e.name, severity=DiagnosticSeverity.Error) for e in parsed
# # ]
# diags = p(doc.lines)
# return RelatedFullDocumentDiagnosticReport(diags)
@server.feature(TEXT_DOCUMENT_DID_OPEN)
def on_open(params: DidOpenTextDocumentParams) -> None:
doc = server.workspace.get_text_document(params.text_document.uri) doc = server.workspace.get_text_document(params.text_document.uri)
try: p = IterativeParser()
logger.warning(parse(doc.source)) diags = p(doc.lines)
except IncompleteParseError as e: server.publish_diagnostics(params.text_document.uri, diags, version=int(time()))
server.publish_diagnostics(
params.text_document.uri,
[ @server.feature(TEXT_DOCUMENT_DID_SAVE)
Diagnostic( def on_save(params: DidSaveTextDocumentParams) -> None:
Range( server.workspace.remove_text_document(params.text_document.uri)
Position(e.line() - 1, e.column() - 1), doc = server.workspace.get_text_document(params.text_document.uri)
Position(len(doc.lines), 0), p = IterativeParser()
), diags = p(doc.lines)
str(e), logger.warning(doc.source)
severity=DiagnosticSeverity.Error, server.publish_diagnostics(params.text_document.uri, diags, version=int(time()))
)
],
)
return []
def main(): def main():
@ -69,4 +113,4 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
file = Path(__file__).parent.parent / "examples" / "example.il" file = Path(__file__).parent.parent / "examples" / "example.il"
parse(file.read_text()) out = parse(file.read_text())

View File

@ -0,0 +1,133 @@
from dataclasses import dataclass, field
from enum import Enum, auto
from logging import getLogger
from typing import NamedTuple, Self
from lsprotocol.types import Diagnostic, DiagnosticSeverity, Position, Range
logger = getLogger(__name__)
class Pair(NamedTuple):
start: str
end: str
class SyntaxPair(Enum):
Paren = Pair("(", ")")
Square = Pair("[", "]")
@classmethod
def by_start_elem(cls, start: str) -> Self:
for option in cls:
if option.value[0] == start:
return option
raise ValueError(f"`{start}` not a valid start character")
@classmethod
def by_end_elem(cls, end: str) -> Self:
for option in cls:
if option.value[1] == end:
return option
raise ValueError(f"`{end}` not a valid end character")
def char_range(line: int, char: int) -> Range:
return Range(Position(line, char), Position(line, char + 1))
def pair_mismatch(line: int, char: int, msg: str) -> Diagnostic:
return Diagnostic(
char_range(line, char),
msg,
severity=DiagnosticSeverity.Error,
)
class StackElement(NamedTuple):
range: Range
elem: SyntaxPair
@dataclass()
class IterativeParser:
_stack: list[StackElement] = field(default_factory=list)
def peek(self) -> StackElement:
return self._stack[-1]
def pop(self) -> StackElement:
return self._stack.pop()
def push(self, pair: StackElement) -> None:
return self._stack.append(pair)
def __call__(self, raw: list[str]) -> list[Diagnostic]:
in_string = False
errs = []
for line, raw_line in enumerate(raw):
for char, raw_char in enumerate(raw_line):
match raw_char:
case ";":
if not in_string:
break
case '"':
in_string = not in_string
case "(" | "[":
if not in_string:
self.push(
StackElement(
char_range(line, char),
SyntaxPair.by_start_elem(raw_char),
)
)
case "]" | ")":
if not in_string:
if not self._stack:
errs.append(
pair_mismatch(
line, char, f"one {raw_char} too much"
)
)
continue
expected = SyntaxPair.by_end_elem(raw_char)
elem = self._stack.pop()
if elem.elem == expected:
continue
if self._stack and self._stack[-1].elem == expected:
errs.append(
pair_mismatch(
line, char, f"unclosed {elem.elem.value.start}"
)
)
self._stack.pop()
self._stack.append(elem)
else:
errs.append(
pair_mismatch(
line, char, f"one {raw_char} too much"
)
)
self._stack.append(elem)
for rest in self._stack:
errs.append(
Diagnostic(
rest.range,
f"unclosed {rest.elem.value.start}",
severity=DiagnosticSeverity.Error,
)
)
self._stack = []
return errs
if __name__ == "__main__":
p = IterativeParser()
print(p(["((([]]))"]))

View File

@ -14,13 +14,14 @@ class Locator:
counter = 0 counter = 0
line = 0 line = 0
for ix, raw_line in enumerate(self.raw): for ix, raw_line in enumerate(self.raw):
if counter + len(raw_line) > index: if counter + len(raw_line) + 1 > index:
line = ix line = ix
break break
else: else:
counter += len(raw_line) counter += len(raw_line) + 1
return Position(line, index - counter) print(counter, line)
return Position(line + 1, index - counter + 1)
@overload @overload
def locate(self, index: int) -> Position: def locate(self, index: int) -> Position:
@ -34,6 +35,7 @@ class Locator:
if isinstance(index, int): if isinstance(index, int):
return self._locate_pos(index) return self._locate_pos(index)
print(index.start, index.end)
start = self._locate_pos(index.start) start = self._locate_pos(index.start)
end = self._locate_pos(index.end) end = self._locate_pos(index.end)

View File

@ -1,5 +1,6 @@
from collections.abc import Iterable, Iterator
from typing import Any, Sequence from typing import Any, Sequence
from lsprotocol.types import Range from lsprotocol.types import DocumentSymbol, Range, SymbolKind
from parsimonious import ParseError from parsimonious import ParseError
from dataclasses import dataclass from dataclasses import dataclass
@ -28,21 +29,25 @@ class List(BaseToken):
value: list[BaseToken] value: list[BaseToken]
def flatten(xs: Iterable) -> Iterator[Any]:
for x in xs:
if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
yield from flatten(x)
else:
yield x
@dataclass @dataclass
class SkillVisitor(NodeVisitor): class SkillVisitor(NodeVisitor):
locator: Locator locator: Locator
def visit_skill(self, _: Node, visited_children: Sequence[Any]) -> list[BaseToken]: def visit_skill(self, _: Node, visited_children: Sequence[Any]) -> list[BaseToken]:
children = [] return list(flatten(visited_children))
for childlist in visited_children:
for child in childlist:
if isinstance(child, BaseToken):
children.append(child)
return children def visit_TOKEN(self, node: Node, _: Any) -> DocumentSymbol:
r = self.locator.locate(node)
def visit_TOKEN(self, node: Node, _: Any) -> Token: print(r)
return Token(self.locator.locate(node), node.text) return DocumentSymbol(node.text, SymbolKind.Property, r, r)
def visit_LITERAL(self, node: Node, visited_children: list[None | Node]) -> Literal: def visit_LITERAL(self, node: Node, visited_children: list[None | Node]) -> Literal:
value, *_ = visited_children value, *_ = visited_children
@ -103,7 +108,7 @@ class SkillVisitor(NodeVisitor):
return List(self.locator.locate(node), children) return List(self.locator.locate(node), children)
def visit_inline_assign(self, node: Node, visited_children: Sequence[Any]): def visit_inline_assign(self, node: Node, visited_children: Sequence[Any]):
print(node) return visited_children or node
def generic_visit( def generic_visit(
self, node: Node, visited_children: Sequence[Any] self, node: Node, visited_children: Sequence[Any]