From 3de76e196ca75188161799d61c39f045e6602b15 Mon Sep 17 00:00:00 2001 From: AcerecA Date: Fri, 19 Jun 2026 11:01:37 +0200 Subject: [PATCH] add agents md --- AGENTS.md | 35 +++++++++++++ skillls/parser.py | 123 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) create mode 100644 AGENTS.md create mode 100644 skillls/parser.py diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..d6bfef5 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,35 @@ +# Project Overview: skillls + +`skillls` is a Language Server Protocol (LSP) implementation for the **Skill** language (specifically targeting `.il` and `.ocn` files). It provides essential IDE features to enhance the development experience, such as error detection, structural navigation, and intelligent code hints. + +## Core Capabilities + +The server implements several key LSP features: + +- **Diagnostics**: Automatically detects syntax errors, specifically focusing on parenthesis mismatches (too many opening or closing parentheses), and reports them with precise line/column information to the editor. +- **Document Symbols**: Parses the file structure to generate a hierarchy of scopes (nodes). This enables editors to provide an "Outline" or "Symbol Tree" view for navigating functions, variables, and namespaces. +- **Inlay Hints**: Provides inline metadata at specific code locations, allowing the editor to display additional context directly within the source text. +- **Workspace Initialization**: Upon connecting, the server scans the workspace root for relevant `.il` and `.ocn` files, building an initial representation of the project's scopes. + +## Architecture & Implementation + +### Parsing Logic +The project uses a multi-layered approach to understand the Skill language: +1. **Content Cleaning**: A pre-processing step identifies and handles comments (`;`) and strings (`"..."`) to ensure parsing is not misled by ignored text. +2. **Structural Analysis**: The server identifies "scope starters" using regular expressions and manual parenthesis tracking to determine the boundaries of functions or namespaces. +3. **Hierarchy Building**: Once individual nodes are identified, the server builds a parent-child tree structure based on the nesting level of parentheses. +4. **Symbol Extraction**: Within each scope, the parser identifies local variables and symbols to populate the `DocumentSymbol` list. + +### Key Components + +- **`skillls/main.py`**: The entry point of the LSP server. It implements the `LanguageServer` class and contains the handlers for LSP lifecycle events (`initialize`, `didOpen`, `didChange`, etc.) and feature requests (`inlayHint`, `documentSymbol`). +- **`skillls/checker.py`**: Contains the logic for syntactic validation, specifically the algorithm for detecting unbalanced parentheses. +- **`skillls/helpers.py`**: Provides the heavy lifting for text processing, including the content cleaning state machine and the recursive logic for building the node hierarchy. +- **`skillls/types.py`**: Defines the internal data models (e.g., `Node`, `URI`) used across the project. + +## Technical Stack + +- **Language**: Python 3.11+ +- **LSP Framework**: `pygls` (Python Language Server) +- **Parsing Utilities**: `parsimonious` (PEG parser), `tree-sitter` (for structural tree analysis). +- **Formatting & Tooling**: `rich` (terminal output), `black`, `ruff`, `mypy`. diff --git a/skillls/parser.py b/skillls/parser.py new file mode 100644 index 0000000..12a7e7f --- /dev/null +++ b/skillls/parser.py @@ -0,0 +1,123 @@ +import tree_sitter_skill +from tree_sitter import Language, Parser +from lsprotocol.types import ( + Diagnostic, + DiagnosticSeverity, + Range, + Position, + DocumentSymbol, + SymbolKind, +) +from pygls.workspace import TextDocument + +class SkillParser: + """ + A Tree-sitter based parser for the Skill language. + Provides diagnostics and document symbols by traversing the Concrete Syntax Tree (CST). + """ + + def __init__(self): + # Initialize the language and parser using tree-sitter-skill bindings + self.language = tree_sitter_skill.language() + self.parser = Parser() + self.parser.set_language(self.language) + + def parse_document(self, text_document: TextDocument) -> tuple[list[Diagnostic], list[DocumentSymbol]]: + """ + Parses the document content and returns both diagnostics (errors) + and a list of DocumentSymbols (outline). + """ + content = text_document.source + if not content: + return [], [] + + # Tree-sitter parsing + tree = self.parser.parse(bytes(content, "utf8")) + + diagnostics: list[Diagnostic] = [] + symbols: list[DocumentSymbol] = [] + + # Traverse the root node to collect errors and symbols + self._traverse_tree(tree.root_node, content, diagnostics, symbols) + + return diagnostics, symbols + + def _traverse_tree( + self, + node, + content: str, + diagnostics: list[Diagnostic], + symbols: list[DocumentSymbol] + ) -> None: + """Recursively traverses the AST to find errors and significant nodes.""" + + # 1. Handle Errors (Diagnostics) + if node.type == "ERROR" or node.type == "MISSING": + start_point = node.start_point + end_point = node.end_point + + diagnostics.append( + Diagnostic( + range=Range( + start=Position(start_point[0], start_point[1]), + end=Position(end_point[0], end_point[1]) + ), + message=f"Syntax error: unexpected {node.type} token", + severity=DiagnosticSeverity.Error, + ) + ) + + # 2. Handle Symbols (Document Symbols / Outline) + # Note: In a real implementation, we would check for specific node types + # like 'function_definition' or 'procedure'. + # Since the exact grammar is in the private repo, we use a pattern: + # If a node represents a definition, we extract its name. + + if self._is_symbol_node(node): + symbol = self._create_document_symbol(node, content) + if symbol: + symbols.append(symbol) + + # 3. Continue traversal + for child in node.children: + self._traverse_tree(child, content, diagnostics, symbols) + + def _is_symbol_node(self, node) -> bool: + """Determines if a node is significant enough to be an outline symbol.""" + # This depends on the tree-sitter-skill grammar. + # We check for typical 'definition' or 'declaration' keywords/types. + # Placeholder logic: we look for nodes that aren't just primitive tokens. + symbolic_types = {"function_definition", "procedure_definition", "namespace", "let_binding"} + return node.type in symbolic_types or node.type.endswith("_def") + + def _create_document_symbol(self, node, content: str) -> DocumentSymbol | None: + """Extracts a name and range for an AST node to create an LSP symbol.""" + # Try to find an identifier child to use as the symbol name + name = None + for child in node.children: + if child.type == "identifier" or child.type == "name": + start_byte = child.start_byte + end_byte = child.end_byte + name = content[start_byte:end_byte] + break + + if not name: + # Fallback to the node type itself if no identifier is found + name = node.type + + start_pt = node.start_point + end_pt = node.end_point + + return DocumentSymbol( + name=name, + kind=SymbolKind.Function, # Defaulting to Function; would be more specific in real grammar + range=Range( + start=Position(start_pt[0], start_pt[1]), + end=Position(end_pt[0], end_pt[1]) + ), + selection_range=Range( + start=Position(start_pt[0], start_pt[1]), + end=Position(start_pt[0], start_pt[1]) + ) + ) +```