Working on Formating DSL completion
This commit is contained in:
0
src/myfasthtml/core/dsl/__init__.py
Normal file
0
src/myfasthtml/core/dsl/__init__.py
Normal file
84
src/myfasthtml/core/dsl/base.py
Normal file
84
src/myfasthtml/core/dsl/base.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""
|
||||
Base class for DSL definitions.
|
||||
|
||||
DSLDefinition provides the interface for defining domain-specific languages
|
||||
that can be used with the DslEditor control and CodeMirror.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from functools import cached_property
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from myfasthtml.core.dsl.lark_to_lezer import (
|
||||
lark_to_lezer_grammar,
|
||||
extract_completions_from_grammar,
|
||||
)
|
||||
|
||||
|
||||
class DSLDefinition(ABC):
|
||||
"""
|
||||
Base class for DSL definitions.
|
||||
|
||||
Subclasses must implement get_grammar() to provide the Lark grammar.
|
||||
The Lezer grammar and completions are automatically derived.
|
||||
|
||||
Attributes:
|
||||
name: Human-readable name of the DSL.
|
||||
"""
|
||||
|
||||
name: str = "DSL"
|
||||
|
||||
@abstractmethod
|
||||
def get_grammar(self) -> str:
|
||||
"""
|
||||
Return the Lark grammar string for this DSL.
|
||||
|
||||
Returns:
|
||||
The Lark grammar as a string.
|
||||
"""
|
||||
pass
|
||||
|
||||
@cached_property
|
||||
def lezer_grammar(self) -> str:
|
||||
"""
|
||||
Return the Lezer grammar derived from the Lark grammar.
|
||||
|
||||
This is cached after first computation.
|
||||
|
||||
Returns:
|
||||
The Lezer grammar as a string.
|
||||
"""
|
||||
return lark_to_lezer_grammar(self.get_grammar())
|
||||
|
||||
@cached_property
|
||||
def completions(self) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Return completion items extracted from the grammar.
|
||||
|
||||
This is cached after first computation.
|
||||
|
||||
Returns:
|
||||
Dictionary with completion categories:
|
||||
- 'keywords': Language keywords (if, not, and, etc.)
|
||||
- 'operators': Comparison and arithmetic operators
|
||||
- 'functions': Function-like constructs (style, format, etc.)
|
||||
- 'types': Type names (number, date, boolean, etc.)
|
||||
- 'literals': Literal values (True, False, etc.)
|
||||
"""
|
||||
return extract_completions_from_grammar(self.get_grammar())
|
||||
|
||||
def get_editor_config(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Return the configuration for the DslEditor JavaScript initialization.
|
||||
|
||||
Returns:
|
||||
Dictionary with:
|
||||
- 'lezerGrammar': The Lezer grammar string
|
||||
- 'completions': The completion items
|
||||
- 'name': The DSL name
|
||||
"""
|
||||
return {
|
||||
"name": self.name,
|
||||
"lezerGrammar": self.lezer_grammar,
|
||||
"completions": self.completions,
|
||||
}
|
||||
172
src/myfasthtml/core/dsl/base_completion.py
Normal file
172
src/myfasthtml/core/dsl/base_completion.py
Normal file
@@ -0,0 +1,172 @@
|
||||
"""
|
||||
Base completion engine for DSL autocompletion.
|
||||
|
||||
Provides an abstract base class that specific DSL implementations
|
||||
can extend to provide context-aware autocompletion.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
from . import utils
|
||||
from .base_provider import BaseMetadataProvider
|
||||
from .types import Position, Suggestion, CompletionResult
|
||||
|
||||
|
||||
class BaseCompletionEngine(ABC):
|
||||
"""
|
||||
Abstract base class for DSL completion engines.
|
||||
|
||||
Subclasses must implement:
|
||||
- detect_scope(): Find the current scope from previous lines
|
||||
- detect_context(): Determine what kind of completion is expected
|
||||
- get_suggestions(): Generate suggestions for the detected context
|
||||
|
||||
The main entry point is get_completions(), which orchestrates the flow.
|
||||
"""
|
||||
|
||||
def __init__(self, provider: BaseMetadataProvider):
|
||||
"""
|
||||
Initialize the completion engine.
|
||||
|
||||
Args:
|
||||
provider: Metadata provider for context-aware suggestions
|
||||
"""
|
||||
self.provider = provider
|
||||
|
||||
def get_completions(self, text: str, cursor: Position) -> CompletionResult:
|
||||
"""
|
||||
Get autocompletion suggestions for the given cursor position.
|
||||
|
||||
This is the main entry point. It:
|
||||
1. Checks if cursor is in a comment (no suggestions)
|
||||
2. Detects the current scope (e.g., which column)
|
||||
3. Detects the completion context (what kind of token is expected)
|
||||
4. Generates and filters suggestions
|
||||
|
||||
Args:
|
||||
text: The full DSL document text
|
||||
cursor: Cursor position
|
||||
|
||||
Returns:
|
||||
CompletionResult with suggestions and replacement range
|
||||
"""
|
||||
# Get the current line up to cursor
|
||||
line = utils.get_line_at(text, cursor.line)
|
||||
line_to_cursor = utils.get_line_up_to_cursor(text, cursor)
|
||||
|
||||
# Check if in comment - no suggestions
|
||||
if utils.is_in_comment(line, cursor.ch):
|
||||
return self._empty_result(cursor)
|
||||
|
||||
# Find word boundaries for replacement range
|
||||
word_range = utils.find_word_boundaries(line, cursor.ch)
|
||||
prefix = line[word_range.start: cursor.ch]
|
||||
|
||||
# Detect scope from previous lines
|
||||
scope = self.detect_scope(text, cursor.line)
|
||||
|
||||
# Detect completion context
|
||||
context = self.detect_context(text, cursor, scope)
|
||||
|
||||
# Get suggestions for this context
|
||||
suggestions = self.get_suggestions(context, scope, prefix)
|
||||
|
||||
# Filter suggestions by prefix
|
||||
if prefix:
|
||||
suggestions = self._filter_suggestions(suggestions, prefix)
|
||||
|
||||
# Build result with correct positions
|
||||
from_pos = Position(line=cursor.line, ch=word_range.start)
|
||||
to_pos = Position(line=cursor.line, ch=word_range.end)
|
||||
|
||||
return CompletionResult(
|
||||
from_pos=from_pos,
|
||||
to_pos=to_pos,
|
||||
suggestions=suggestions,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def detect_scope(self, text: str, current_line: int) -> Any:
|
||||
"""
|
||||
Detect the current scope by scanning previous lines.
|
||||
|
||||
The scope determines which data context we're in (e.g., which column
|
||||
for column values suggestions).
|
||||
|
||||
Args:
|
||||
text: The full document text
|
||||
current_line: Current line number (0-based)
|
||||
|
||||
Returns:
|
||||
Scope object (type depends on the specific DSL)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def detect_context(self, text: str, cursor: Position, scope: Any) -> Any:
|
||||
"""
|
||||
Detect the completion context at the cursor position.
|
||||
|
||||
Analyzes the current line to determine what kind of token
|
||||
is expected (e.g., keyword, preset name, operator).
|
||||
|
||||
Args:
|
||||
text: The full document text
|
||||
cursor: Cursor position
|
||||
scope: The detected scope
|
||||
|
||||
Returns:
|
||||
Context identifier (type depends on the specific DSL)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_suggestions(self, context: Any, scope: Any, prefix: str) -> list[Suggestion]:
|
||||
"""
|
||||
Generate suggestions for the given context.
|
||||
|
||||
Args:
|
||||
context: The detected completion context
|
||||
scope: The detected scope
|
||||
prefix: The current word prefix (for filtering)
|
||||
|
||||
Returns:
|
||||
List of suggestions
|
||||
"""
|
||||
pass
|
||||
|
||||
def _filter_suggestions(
|
||||
self, suggestions: list[Suggestion], prefix: str
|
||||
) -> list[Suggestion]:
|
||||
"""
|
||||
Filter suggestions by prefix (case-insensitive).
|
||||
|
||||
Args:
|
||||
suggestions: List of suggestions
|
||||
prefix: Prefix to filter by
|
||||
|
||||
Returns:
|
||||
Filtered list of suggestions
|
||||
"""
|
||||
prefix_lower = prefix.lower()
|
||||
return [s for s in suggestions if s.label.lower().startswith(prefix_lower)]
|
||||
|
||||
def _empty_result(self, cursor: Position) -> CompletionResult:
|
||||
"""
|
||||
Return an empty completion result.
|
||||
|
||||
Args:
|
||||
cursor: Cursor position
|
||||
|
||||
Returns:
|
||||
CompletionResult with no suggestions
|
||||
"""
|
||||
return CompletionResult(
|
||||
from_pos=cursor,
|
||||
to_pos=cursor,
|
||||
suggestions=[],
|
||||
)
|
||||
|
||||
def get_id(self):
|
||||
return type(self).__name__
|
||||
38
src/myfasthtml/core/dsl/base_provider.py
Normal file
38
src/myfasthtml/core/dsl/base_provider.py
Normal file
@@ -0,0 +1,38 @@
|
||||
"""
|
||||
Base provider protocol for DSL autocompletion.
|
||||
|
||||
Defines the minimal interface that metadata providers must implement
|
||||
to support context-aware autocompletion.
|
||||
"""
|
||||
|
||||
from typing import Protocol
|
||||
|
||||
|
||||
class BaseMetadataProvider(Protocol):
|
||||
"""
|
||||
Protocol defining the interface for metadata providers.
|
||||
|
||||
Metadata providers give the autocompletion engine access to
|
||||
context-specific data (e.g., column names, available values).
|
||||
|
||||
This is a minimal interface. Specific DSL implementations
|
||||
can extend this with additional methods.
|
||||
"""
|
||||
|
||||
def get_style_presets(self) -> list[str]:
|
||||
"""
|
||||
Return the list of available style preset names.
|
||||
|
||||
Returns:
|
||||
List of style preset names (e.g., ["primary", "error", "success"])
|
||||
"""
|
||||
...
|
||||
|
||||
def get_format_presets(self) -> list[str]:
|
||||
"""
|
||||
Return the list of available format preset names.
|
||||
|
||||
Returns:
|
||||
List of format preset names (e.g., ["EUR", "USD", "percentage"])
|
||||
"""
|
||||
...
|
||||
256
src/myfasthtml/core/dsl/lark_to_lezer.py
Normal file
256
src/myfasthtml/core/dsl/lark_to_lezer.py
Normal file
@@ -0,0 +1,256 @@
|
||||
"""
|
||||
Utilities for converting Lark grammars to Lezer format and extracting completions.
|
||||
|
||||
This module provides functions to:
|
||||
1. Transform a Lark grammar to a Lezer grammar for CodeMirror
|
||||
2. Extract completion items (keywords, operators, etc.) from a Lark grammar
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Dict, List, Set
|
||||
|
||||
|
||||
def lark_to_lezer_grammar(lark_grammar: str) -> str:
|
||||
"""
|
||||
Convert a Lark grammar to a Lezer grammar.
|
||||
|
||||
This is a simplified converter that handles common Lark patterns.
|
||||
Complex grammars may require manual adjustment.
|
||||
|
||||
Args:
|
||||
lark_grammar: The Lark grammar string.
|
||||
|
||||
Returns:
|
||||
The Lezer grammar string.
|
||||
"""
|
||||
lines = lark_grammar.strip().split("\n")
|
||||
lezer_rules = []
|
||||
tokens = []
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
# Skip empty lines and comments
|
||||
if not line or line.startswith("//") or line.startswith("#"):
|
||||
continue
|
||||
|
||||
# Skip Lark-specific directives
|
||||
if line.startswith("%"):
|
||||
continue
|
||||
|
||||
# Parse rule definitions (lowercase names only)
|
||||
rule_match = re.match(r"^([a-z_][a-z0-9_]*)\s*:\s*(.+)$", line)
|
||||
if rule_match:
|
||||
name, body = rule_match.groups()
|
||||
lezer_rule = _convert_rule(name, body)
|
||||
if lezer_rule:
|
||||
lezer_rules.append(lezer_rule)
|
||||
continue
|
||||
|
||||
# Parse terminal definitions (uppercase names)
|
||||
terminal_match = re.match(r"^([A-Z_][A-Z0-9_]*)\s*:\s*(.+)$", line)
|
||||
if terminal_match:
|
||||
name, pattern = terminal_match.groups()
|
||||
token = _convert_terminal(name, pattern)
|
||||
if token:
|
||||
tokens.append(token)
|
||||
|
||||
# Build Lezer grammar
|
||||
lezer_output = ["@top Start { scope+ }", ""]
|
||||
|
||||
# Add rules
|
||||
for rule in lezer_rules:
|
||||
lezer_output.append(rule)
|
||||
|
||||
lezer_output.append("")
|
||||
lezer_output.append("@tokens {")
|
||||
|
||||
# Add tokens
|
||||
for token in tokens:
|
||||
lezer_output.append(f" {token}")
|
||||
|
||||
# Add common tokens
|
||||
lezer_output.extend([
|
||||
' whitespace { $[ \\t]+ }',
|
||||
' newline { $[\\n\\r] }',
|
||||
' Comment { "#" ![$\\n]* }',
|
||||
])
|
||||
|
||||
lezer_output.append("}")
|
||||
lezer_output.append("")
|
||||
lezer_output.append("@skip { whitespace | Comment }")
|
||||
|
||||
return "\n".join(lezer_output)
|
||||
|
||||
|
||||
def _convert_rule(name: str, body: str) -> str:
|
||||
"""Convert a single Lark rule to Lezer format."""
|
||||
# Skip internal rules (starting with _)
|
||||
if name.startswith("_"):
|
||||
return ""
|
||||
|
||||
# Convert rule name to PascalCase for Lezer
|
||||
lezer_name = _to_pascal_case(name)
|
||||
|
||||
# Convert body
|
||||
lezer_body = _convert_body(body)
|
||||
|
||||
if lezer_body:
|
||||
return f"{lezer_name} {{ {lezer_body} }}"
|
||||
return ""
|
||||
|
||||
|
||||
def _convert_terminal(name: str, pattern: str) -> str:
|
||||
"""Convert a Lark terminal to Lezer token format."""
|
||||
pattern = pattern.strip()
|
||||
|
||||
# Handle regex patterns
|
||||
if pattern.startswith("/") and pattern.endswith("/"):
|
||||
regex = pattern[1:-1]
|
||||
# Convert to Lezer regex format
|
||||
return f'{name} {{ ${regex}$ }}'
|
||||
|
||||
# Handle string literals
|
||||
if pattern.startswith('"') or pattern.startswith("'"):
|
||||
return f'{name} {{ {pattern} }}'
|
||||
|
||||
# Handle alternatives (literal strings separated by |)
|
||||
if "|" in pattern:
|
||||
alternatives = [alt.strip() for alt in pattern.split("|")]
|
||||
if all(alt.startswith('"') or alt.startswith("'") for alt in alternatives):
|
||||
return f'{name} {{ {" | ".join(alternatives)} }}'
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def _convert_body(body: str) -> str:
|
||||
"""Convert the body of a Lark rule to Lezer format."""
|
||||
# Remove inline transformations (-> name)
|
||||
body = re.sub(r"\s*->\s*\w+", "", body)
|
||||
|
||||
# Convert alternatives
|
||||
parts = []
|
||||
for alt in body.split("|"):
|
||||
alt = alt.strip()
|
||||
if alt:
|
||||
converted = _convert_sequence(alt)
|
||||
if converted:
|
||||
parts.append(converted)
|
||||
|
||||
return " | ".join(parts)
|
||||
|
||||
|
||||
def _convert_sequence(seq: str) -> str:
|
||||
"""Convert a sequence of items in a rule."""
|
||||
items = []
|
||||
|
||||
# Tokenize the sequence
|
||||
tokens = re.findall(
|
||||
r'"[^"]*"|\'[^\']*\'|/[^/]+/|\([^)]+\)|\[[^\]]+\]|[a-zA-Z_][a-zA-Z0-9_]*|\?|\*|\+',
|
||||
seq
|
||||
)
|
||||
|
||||
for token in tokens:
|
||||
if token.startswith('"') or token.startswith("'"):
|
||||
# String literal
|
||||
items.append(token)
|
||||
elif token.startswith("("):
|
||||
# Group
|
||||
inner = token[1:-1]
|
||||
items.append(f"({_convert_body(inner)})")
|
||||
elif token.startswith("["):
|
||||
# Optional group in Lark
|
||||
inner = token[1:-1]
|
||||
items.append(f"({_convert_body(inner)})?")
|
||||
elif token in ("?", "*", "+"):
|
||||
# Quantifiers - attach to previous item
|
||||
if items:
|
||||
items[-1] = items[-1] + token
|
||||
elif token.isupper() or token.startswith("_"):
|
||||
# Terminal reference
|
||||
items.append(token)
|
||||
elif token.islower() or "_" in token:
|
||||
# Rule reference - convert to PascalCase
|
||||
items.append(_to_pascal_case(token))
|
||||
|
||||
return " ".join(items)
|
||||
|
||||
|
||||
def _to_pascal_case(name: str) -> str:
|
||||
"""Convert snake_case to PascalCase."""
|
||||
return "".join(word.capitalize() for word in name.split("_"))
|
||||
|
||||
|
||||
def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Extract completion items from a Lark grammar.
|
||||
|
||||
Parses the grammar to find:
|
||||
- Keywords (reserved words like if, not, and)
|
||||
- Operators (==, !=, contains, etc.)
|
||||
- Functions (style, format, etc.)
|
||||
- Types (number, date, boolean, etc.)
|
||||
- Literals (True, False, etc.)
|
||||
|
||||
Args:
|
||||
lark_grammar: The Lark grammar string.
|
||||
|
||||
Returns:
|
||||
Dictionary with completion categories.
|
||||
"""
|
||||
keywords: Set[str] = set()
|
||||
operators: Set[str] = set()
|
||||
functions: Set[str] = set()
|
||||
types: Set[str] = set()
|
||||
literals: Set[str] = set()
|
||||
|
||||
# Find all quoted strings (potential keywords/operators)
|
||||
quoted_strings = re.findall(r'"([^"]+)"', lark_grammar)
|
||||
|
||||
# Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False")
|
||||
terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar)
|
||||
for match in terminal_literals:
|
||||
for literal in match:
|
||||
if literal:
|
||||
quoted_strings.append(literal)
|
||||
|
||||
for s in quoted_strings:
|
||||
s_lower = s.lower()
|
||||
|
||||
# Classify based on pattern
|
||||
if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"):
|
||||
operators.add(s)
|
||||
elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"):
|
||||
operators.add(s_lower)
|
||||
elif s_lower in ("if", "not", "and", "or"):
|
||||
keywords.add(s_lower)
|
||||
elif s_lower in ("true", "false"):
|
||||
literals.add(s)
|
||||
elif s_lower in ("style", "format"):
|
||||
functions.add(s_lower)
|
||||
elif s_lower in ("column", "row", "cell", "value", "col"):
|
||||
keywords.add(s_lower)
|
||||
elif s_lower in ("number", "date", "boolean", "text", "enum"):
|
||||
types.add(s_lower)
|
||||
elif s_lower == "case":
|
||||
keywords.add(s_lower)
|
||||
|
||||
# Find function-like patterns: word "("
|
||||
function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar)
|
||||
for func in function_patterns:
|
||||
if func.lower() not in ("true", "false"):
|
||||
functions.add(func.lower())
|
||||
|
||||
# Find type patterns from format_type rule
|
||||
type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL)
|
||||
if type_match:
|
||||
type_strings = re.findall(r'"(\w+)"', type_match.group(1))
|
||||
types.update(t.lower() for t in type_strings)
|
||||
|
||||
return {
|
||||
"keywords": sorted(keywords),
|
||||
"operators": sorted(operators),
|
||||
"functions": sorted(functions),
|
||||
"types": sorted(types),
|
||||
"literals": sorted(literals),
|
||||
}
|
||||
103
src/myfasthtml/core/dsl/types.py
Normal file
103
src/myfasthtml/core/dsl/types.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""
|
||||
Base types for DSL autocompletion.
|
||||
|
||||
Provides dataclasses for cursor position, suggestions, and completion results
|
||||
compatible with CodeMirror 5.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Position:
|
||||
"""
|
||||
Cursor position in a document.
|
||||
|
||||
Compatible with CodeMirror 5 position format.
|
||||
|
||||
Attributes:
|
||||
line: 0-based line number
|
||||
ch: 0-based character position in the line
|
||||
"""
|
||||
|
||||
line: int
|
||||
ch: int
|
||||
|
||||
def to_dict(self) -> dict[str, int]:
|
||||
"""Convert to CodeMirror-compatible dictionary."""
|
||||
return {"line": self.line, "ch": self.ch}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Suggestion:
|
||||
"""
|
||||
A single autocompletion suggestion.
|
||||
|
||||
Attributes:
|
||||
label: The text to display and insert
|
||||
detail: Optional description shown next to the label
|
||||
kind: Optional category (e.g., "keyword", "preset", "value")
|
||||
"""
|
||||
|
||||
label: str
|
||||
detail: str = ""
|
||||
kind: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, str]:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
result = {"label": self.label}
|
||||
if self.detail:
|
||||
result["detail"] = self.detail
|
||||
if self.kind:
|
||||
result["kind"] = self.kind
|
||||
return result
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompletionResult:
|
||||
"""
|
||||
Result of an autocompletion request.
|
||||
|
||||
Compatible with CodeMirror 5 hint format.
|
||||
|
||||
Attributes:
|
||||
from_pos: Start position of the text to replace
|
||||
to_pos: End position of the text to replace
|
||||
suggestions: List of completion suggestions
|
||||
"""
|
||||
|
||||
from_pos: Position
|
||||
to_pos: Position
|
||||
suggestions: list[Suggestion] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to CodeMirror-compatible dictionary."""
|
||||
return {
|
||||
"from": self.from_pos.to_dict(),
|
||||
"to": self.to_pos.to_dict(),
|
||||
"suggestions": [s.to_dict() for s in self.suggestions],
|
||||
}
|
||||
|
||||
@property
|
||||
def is_empty(self) -> bool:
|
||||
"""Return True if there are no suggestions."""
|
||||
return len(self.suggestions) == 0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WordRange:
|
||||
"""
|
||||
Range of a word in a line.
|
||||
|
||||
Used for determining what text to replace when applying a suggestion.
|
||||
|
||||
Attributes:
|
||||
start: Start character position (inclusive)
|
||||
end: End character position (exclusive)
|
||||
text: The word text
|
||||
"""
|
||||
|
||||
start: int
|
||||
end: int
|
||||
text: str = ""
|
||||
226
src/myfasthtml/core/dsl/utils.py
Normal file
226
src/myfasthtml/core/dsl/utils.py
Normal file
@@ -0,0 +1,226 @@
|
||||
"""
|
||||
Shared utilities for DSL autocompletion.
|
||||
|
||||
Provides helper functions for text analysis, word boundary detection,
|
||||
and other common operations used by completion engines.
|
||||
"""
|
||||
|
||||
from .types import Position, WordRange
|
||||
|
||||
# Delimiters used to detect word boundaries
|
||||
DELIMITERS = set('"\' ()[]{}=,:<>!\t\n\r')
|
||||
|
||||
|
||||
def get_line_at(text: str, line_number: int) -> str:
|
||||
"""
|
||||
Get the content of a specific line.
|
||||
|
||||
Args:
|
||||
text: The full document text
|
||||
line_number: 0-based line number
|
||||
|
||||
Returns:
|
||||
The line content, or empty string if line doesn't exist
|
||||
"""
|
||||
lines = text.split("\n")
|
||||
if 0 <= line_number < len(lines):
|
||||
return lines[line_number]
|
||||
return ""
|
||||
|
||||
|
||||
def get_line_up_to_cursor(text: str, cursor: Position) -> str:
|
||||
"""
|
||||
Get the content of the current line up to the cursor position.
|
||||
|
||||
Args:
|
||||
text: The full document text
|
||||
cursor: Cursor position
|
||||
|
||||
Returns:
|
||||
The line content from start to cursor position
|
||||
"""
|
||||
line = get_line_at(text, cursor.line)
|
||||
return line[: cursor.ch]
|
||||
|
||||
|
||||
def get_lines_up_to(text: str, line_number: int) -> list[str]:
|
||||
"""
|
||||
Get all lines from start up to and including the specified line.
|
||||
|
||||
Args:
|
||||
text: The full document text
|
||||
line_number: 0-based line number (inclusive)
|
||||
|
||||
Returns:
|
||||
List of lines from 0 to line_number
|
||||
"""
|
||||
lines = text.split("\n")
|
||||
return lines[: line_number + 1]
|
||||
|
||||
|
||||
def find_word_boundaries(line: str, cursor_ch: int) -> WordRange:
|
||||
"""
|
||||
Find the word boundaries around the cursor position.
|
||||
|
||||
Uses delimiters to detect where a word starts and ends.
|
||||
The cursor can be anywhere within the word.
|
||||
|
||||
Args:
|
||||
line: The line content
|
||||
cursor_ch: Cursor character position in the line
|
||||
|
||||
Returns:
|
||||
WordRange with start, end positions and the word text
|
||||
"""
|
||||
if not line or cursor_ch < 0:
|
||||
return WordRange(start=cursor_ch, end=cursor_ch, text="")
|
||||
|
||||
# Clamp cursor position to line length
|
||||
cursor_ch = min(cursor_ch, len(line))
|
||||
|
||||
# Find start of word (scan backwards from cursor)
|
||||
start = cursor_ch
|
||||
while start > 0 and line[start - 1] not in DELIMITERS:
|
||||
start -= 1
|
||||
|
||||
# Find end of word (scan forwards from cursor)
|
||||
end = cursor_ch
|
||||
while end < len(line) and line[end] not in DELIMITERS:
|
||||
end += 1
|
||||
|
||||
word = line[start:end]
|
||||
return WordRange(start=start, end=end, text=word)
|
||||
|
||||
|
||||
def get_prefix(line: str, cursor_ch: int) -> str:
|
||||
"""
|
||||
Get the word prefix before the cursor.
|
||||
|
||||
This is the text from the start of the current word to the cursor.
|
||||
|
||||
Args:
|
||||
line: The line content
|
||||
cursor_ch: Cursor character position in the line
|
||||
|
||||
Returns:
|
||||
The prefix text
|
||||
"""
|
||||
word_range = find_word_boundaries(line, cursor_ch)
|
||||
# Prefix is from word start to cursor
|
||||
return line[word_range.start: cursor_ch]
|
||||
|
||||
|
||||
def is_in_comment(line: str, cursor_ch: int) -> bool:
|
||||
"""
|
||||
Check if the cursor is inside a comment.
|
||||
|
||||
A comment starts with # and extends to the end of the line.
|
||||
|
||||
Args:
|
||||
line: The line content
|
||||
cursor_ch: Cursor character position in the line
|
||||
|
||||
Returns:
|
||||
True if cursor is after a # character
|
||||
"""
|
||||
# Find first # that's not inside a string
|
||||
in_string = False
|
||||
string_char = None
|
||||
|
||||
for i, char in enumerate(line):
|
||||
if i >= cursor_ch:
|
||||
break
|
||||
|
||||
if char in ('"', "'") and (i == 0 or line[i - 1] != "\\"):
|
||||
if not in_string:
|
||||
in_string = True
|
||||
string_char = char
|
||||
elif char == string_char:
|
||||
in_string = False
|
||||
string_char = None
|
||||
elif char == "#" and not in_string:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def is_in_string(line: str, cursor_ch: int) -> tuple[bool, str | None]:
|
||||
"""
|
||||
Check if the cursor is inside a string literal.
|
||||
|
||||
Args:
|
||||
line: The line content
|
||||
cursor_ch: Cursor character position in the line
|
||||
|
||||
Returns:
|
||||
Tuple of (is_in_string, quote_char)
|
||||
quote_char is '"' or "'" if inside a string, None otherwise
|
||||
"""
|
||||
in_string = False
|
||||
string_char = None
|
||||
|
||||
for i, char in enumerate(line):
|
||||
if i >= cursor_ch:
|
||||
break
|
||||
|
||||
if char in ('"', "'") and (i == 0 or line[i - 1] != "\\"):
|
||||
if not in_string:
|
||||
in_string = True
|
||||
string_char = char
|
||||
elif char == string_char:
|
||||
in_string = False
|
||||
string_char = None
|
||||
|
||||
return in_string, string_char if in_string else None
|
||||
|
||||
|
||||
def get_indentation(line: str) -> int:
|
||||
"""
|
||||
Get the indentation level of a line.
|
||||
|
||||
Counts leading spaces (tabs are converted to 4 spaces).
|
||||
|
||||
Args:
|
||||
line: The line content
|
||||
|
||||
Returns:
|
||||
Number of leading spaces
|
||||
"""
|
||||
count = 0
|
||||
for char in line:
|
||||
if char == " ":
|
||||
count += 1
|
||||
elif char == "\t":
|
||||
count += 4
|
||||
else:
|
||||
break
|
||||
return count
|
||||
|
||||
|
||||
def is_indented(line: str) -> bool:
|
||||
"""
|
||||
Check if a line is indented (has leading whitespace).
|
||||
|
||||
Args:
|
||||
line: The line content
|
||||
|
||||
Returns:
|
||||
True if line starts with whitespace
|
||||
"""
|
||||
return len(line) > 0 and line[0] in (" ", "\t")
|
||||
|
||||
|
||||
def strip_quotes(text: str) -> str:
|
||||
"""
|
||||
Remove surrounding quotes from a string.
|
||||
|
||||
Args:
|
||||
text: Text that may be quoted
|
||||
|
||||
Returns:
|
||||
Text without surrounding quotes
|
||||
"""
|
||||
if len(text) >= 2:
|
||||
if (text[0] == '"' and text[-1] == '"') or (text[0] == "'" and text[-1] == "'"):
|
||||
return text[1:-1]
|
||||
return text
|
||||
Reference in New Issue
Block a user