replace peg parser with iterative approach

This commit is contained in:
Patrick 2023-10-19 22:06:08 +02:00 committed by AcerecA
parent a924ca5710
commit 9e63218572
5 changed files with 232 additions and 37 deletions

View File

@ -1,3 +1,14 @@
example = nil
example = nil
example2 = example
(
example[qdqoifq]
(let (some vars (default 0))
; ... some wall of text
"))"
wqdqwf = '(doqwf)
qqvwv
)
)

View File

@ -1,19 +1,33 @@
from logging import INFO, basicConfig, getLogger
from pathlib import Path
from time import time
from lsprotocol.types import (
TEXT_DOCUMENT_DIAGNOSTIC,
TEXT_DOCUMENT_DID_CHANGE,
TEXT_DOCUMENT_DID_OPEN,
TEXT_DOCUMENT_DID_SAVE,
TEXT_DOCUMENT_DOCUMENT_SYMBOL,
TEXT_DOCUMENT_PUBLISH_DIAGNOSTICS,
CompletionItem,
Diagnostic,
DiagnosticSeverity,
DidChangeTextDocumentParams,
DidOpenTextDocumentParams,
DidSaveTextDocumentParams,
DocumentDiagnosticParams,
DocumentDiagnosticReport,
DocumentSymbol,
DocumentSymbolParams,
Position,
Range,
RelatedFullDocumentDiagnosticReport,
)
from pygls.server import LanguageServer
from parsimonious import Grammar, IncompleteParseError
from skillls.parsing.iterative import IterativeParser
from .cache import Cache
from .parsing.tokenize import Locator, SkillVisitor
@ -31,7 +45,7 @@ def parse(content: str):
path = Path(__file__).parent / "grammar.peg"
grammar = Grammar(path.read_text())
locator = Locator(content)
locator = Locator(content.split())
tree = grammar.parse(content)
iv = SkillVisitor(locator)
@ -40,27 +54,57 @@ def parse(content: str):
return output
@server.feature(TEXT_DOCUMENT_DOCUMENT_SYMBOL)
def document_symbols(params: DocumentSymbolParams) -> list[DocumentSymbol]:
logger.warning("requested document symbols for %s", params.text_document.uri)
# @server.feature(TEXT_DOCUMENT_DOCUMENT_SYMBOL)
# def document_symbols(params: DocumentSymbolParams) -> list[DocumentSymbol]:
# logger.warning("requested document symbols for %s", params.text_document.uri)
# doc = server.workspace.get_text_document(params.text_document.uri)
# try:
# logger.warning(parse(doc.source))
# except IncompleteParseError as e:
# server.publish_diagnostics(
# params.text_document.uri,
# [
# Diagnostic(
# Range(
# Position(e.line() - 1, e.column() - 1),
# Position(len(doc.lines), 0),
# ),
# str(e),
# severity=DiagnosticSeverity.Error,
# )
# ],
# )
# return []
# @server.feature(TEXT_DOCUMENT_DIAGNOSTIC)
# def document_diagnostic(params: DocumentDiagnosticParams) -> DocumentDiagnosticReport:
# doc = server.workspace.get_text_document(params.text_document.uri)
# p = IterativeParser()
# # parsed = (e for e in parse(doc.source) if isinstance(e, DocumentSymbol))
# # diags = [
# # Diagnostic(e.range, e.name, severity=DiagnosticSeverity.Error) for e in parsed
# # ]
# diags = p(doc.lines)
# return RelatedFullDocumentDiagnosticReport(diags)
@server.feature(TEXT_DOCUMENT_DID_OPEN)
def on_open(params: DidOpenTextDocumentParams) -> None:
doc = server.workspace.get_text_document(params.text_document.uri)
try:
logger.warning(parse(doc.source))
except IncompleteParseError as e:
server.publish_diagnostics(
params.text_document.uri,
[
Diagnostic(
Range(
Position(e.line() - 1, e.column() - 1),
Position(len(doc.lines), 0),
),
str(e),
severity=DiagnosticSeverity.Error,
)
],
)
return []
p = IterativeParser()
diags = p(doc.lines)
server.publish_diagnostics(params.text_document.uri, diags, version=int(time()))
@server.feature(TEXT_DOCUMENT_DID_SAVE)
def on_save(params: DidSaveTextDocumentParams) -> None:
server.workspace.remove_text_document(params.text_document.uri)
doc = server.workspace.get_text_document(params.text_document.uri)
p = IterativeParser()
diags = p(doc.lines)
logger.warning(doc.source)
server.publish_diagnostics(params.text_document.uri, diags, version=int(time()))
def main():
@ -69,4 +113,4 @@ def main():
if __name__ == "__main__":
file = Path(__file__).parent.parent / "examples" / "example.il"
parse(file.read_text())
out = parse(file.read_text())

View File

@ -0,0 +1,133 @@
from dataclasses import dataclass, field
from enum import Enum, auto
from logging import getLogger
from typing import NamedTuple, Self
from lsprotocol.types import Diagnostic, DiagnosticSeverity, Position, Range
logger = getLogger(__name__)
class Pair(NamedTuple):
start: str
end: str
class SyntaxPair(Enum):
Paren = Pair("(", ")")
Square = Pair("[", "]")
@classmethod
def by_start_elem(cls, start: str) -> Self:
for option in cls:
if option.value[0] == start:
return option
raise ValueError(f"`{start}` not a valid start character")
@classmethod
def by_end_elem(cls, end: str) -> Self:
for option in cls:
if option.value[1] == end:
return option
raise ValueError(f"`{end}` not a valid end character")
def char_range(line: int, char: int) -> Range:
return Range(Position(line, char), Position(line, char + 1))
def pair_mismatch(line: int, char: int, msg: str) -> Diagnostic:
return Diagnostic(
char_range(line, char),
msg,
severity=DiagnosticSeverity.Error,
)
class StackElement(NamedTuple):
range: Range
elem: SyntaxPair
@dataclass()
class IterativeParser:
_stack: list[StackElement] = field(default_factory=list)
def peek(self) -> StackElement:
return self._stack[-1]
def pop(self) -> StackElement:
return self._stack.pop()
def push(self, pair: StackElement) -> None:
return self._stack.append(pair)
def __call__(self, raw: list[str]) -> list[Diagnostic]:
in_string = False
errs = []
for line, raw_line in enumerate(raw):
for char, raw_char in enumerate(raw_line):
match raw_char:
case ";":
if not in_string:
break
case '"':
in_string = not in_string
case "(" | "[":
if not in_string:
self.push(
StackElement(
char_range(line, char),
SyntaxPair.by_start_elem(raw_char),
)
)
case "]" | ")":
if not in_string:
if not self._stack:
errs.append(
pair_mismatch(
line, char, f"one {raw_char} too much"
)
)
continue
expected = SyntaxPair.by_end_elem(raw_char)
elem = self._stack.pop()
if elem.elem == expected:
continue
if self._stack and self._stack[-1].elem == expected:
errs.append(
pair_mismatch(
line, char, f"unclosed {elem.elem.value.start}"
)
)
self._stack.pop()
self._stack.append(elem)
else:
errs.append(
pair_mismatch(
line, char, f"one {raw_char} too much"
)
)
self._stack.append(elem)
for rest in self._stack:
errs.append(
Diagnostic(
rest.range,
f"unclosed {rest.elem.value.start}",
severity=DiagnosticSeverity.Error,
)
)
self._stack = []
return errs
if __name__ == "__main__":
p = IterativeParser()
print(p(["((([]]))"]))

View File

@ -14,13 +14,14 @@ class Locator:
counter = 0
line = 0
for ix, raw_line in enumerate(self.raw):
if counter + len(raw_line) > index:
if counter + len(raw_line) + 1 > index:
line = ix
break
else:
counter += len(raw_line)
counter += len(raw_line) + 1
return Position(line, index - counter)
print(counter, line)
return Position(line + 1, index - counter + 1)
@overload
def locate(self, index: int) -> Position:
@ -34,6 +35,7 @@ class Locator:
if isinstance(index, int):
return self._locate_pos(index)
print(index.start, index.end)
start = self._locate_pos(index.start)
end = self._locate_pos(index.end)

View File

@ -1,5 +1,6 @@
from collections.abc import Iterable, Iterator
from typing import Any, Sequence
from lsprotocol.types import Range
from lsprotocol.types import DocumentSymbol, Range, SymbolKind
from parsimonious import ParseError
from dataclasses import dataclass
@ -28,21 +29,25 @@ class List(BaseToken):
value: list[BaseToken]
def flatten(xs: Iterable) -> Iterator[Any]:
for x in xs:
if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
yield from flatten(x)
else:
yield x
@dataclass
class SkillVisitor(NodeVisitor):
locator: Locator
def visit_skill(self, _: Node, visited_children: Sequence[Any]) -> list[BaseToken]:
children = []
for childlist in visited_children:
for child in childlist:
if isinstance(child, BaseToken):
children.append(child)
return list(flatten(visited_children))
return children
def visit_TOKEN(self, node: Node, _: Any) -> Token:
return Token(self.locator.locate(node), node.text)
def visit_TOKEN(self, node: Node, _: Any) -> DocumentSymbol:
r = self.locator.locate(node)
print(r)
return DocumentSymbol(node.text, SymbolKind.Property, r, r)
def visit_LITERAL(self, node: Node, visited_children: list[None | Node]) -> Literal:
value, *_ = visited_children
@ -103,7 +108,7 @@ class SkillVisitor(NodeVisitor):
return List(self.locator.locate(node), children)
def visit_inline_assign(self, node: Node, visited_children: Sequence[Any]):
print(node)
return visited_children or node
def generic_visit(
self, node: Node, visited_children: Sequence[Any]