Added syntax colorization. Remove all references to deprecated lark_to_lezer module.

This commit is contained in:
2026-02-07 11:08:34 +01:00
parent 1c1ced2a9f
commit ab4f251f0c
7 changed files with 106 additions and 461 deletions

View File

@@ -9,10 +9,7 @@ from abc import ABC, abstractmethod
from functools import cached_property
from typing import List, Dict, Any
# TODO: Replace with lark_to_simple_mode when implemented
from myfasthtml.core.dsl.lark_to_lezer import (
extract_completions_from_grammar, # Will be moved to utils.py
)
from myfasthtml.core.dsl.lark_to_simple_mode import extract_completions_from_grammar
from myfasthtml.core.utils import make_safe_id
@@ -82,13 +79,13 @@ class DSLDefinition(ABC):
Returns:
Dictionary with:
- 'lezerGrammar': The Lezer grammar string
- 'simpleModeConfig': The CodeMirror Simple Mode configuration
- 'completions': The completion items
- 'name': The DSL name
"""
return {
"name": self.name,
"lezerGrammar": self.lezer_grammar,
"simpleModeConfig": self.simple_mode_config,
"completions": self.completions,
}

View File

@@ -1,267 +0,0 @@
# """
# DEPRECATED: Utilities for converting Lark grammars to Lezer format.
#
# ⚠️ WARNING: This module is deprecated and will be removed in a future version.
#
# Original purpose:
# - Transform a Lark grammar to a Lezer grammar for CodeMirror 6
# - Extract completion items (keywords, operators, etc.) from a Lark grammar
#
# Deprecation reason:
# - CodeMirror 6 requires a bundler (Webpack, Rollup, etc.)
# - Incompatible with FastHTML's direct script inclusion approach
# - Replaced by CodeMirror 5 Simple Mode (see lark_to_simple_mode.py)
#
# Migration path:
# - Use lark_to_simple_mode.py for CodeMirror 5 syntax highlighting
# - extract_completions_from_grammar() is still used and will be moved to utils.py
# """
#
# import re
# from typing import Dict, List, Set
#
#
# def lark_to_lezer_grammar(lark_grammar: str) -> str:
# """
# Convert a Lark grammar to a Lezer grammar.
#
# This is a simplified converter that handles common Lark patterns.
# Complex grammars may require manual adjustment.
#
# Args:
# lark_grammar: The Lark grammar string.
#
# Returns:
# The Lezer grammar string.
# """
# lines = lark_grammar.strip().split("\n")
# lezer_rules = []
# tokens = []
#
# for line in lines:
# line = line.strip()
#
# # Skip empty lines and comments
# if not line or line.startswith("//") or line.startswith("#"):
# continue
#
# # Skip Lark-specific directives
# if line.startswith("%"):
# continue
#
# # Parse rule definitions (lowercase names only)
# rule_match = re.match(r"^([a-z_][a-z0-9_]*)\s*:\s*(.+)$", line)
# if rule_match:
# name, body = rule_match.groups()
# lezer_rule = _convert_rule(name, body)
# if lezer_rule:
# lezer_rules.append(lezer_rule)
# continue
#
# # Parse terminal definitions (uppercase names)
# terminal_match = re.match(r"^([A-Z_][A-Z0-9_]*)\s*:\s*(.+)$", line)
# if terminal_match:
# name, pattern = terminal_match.groups()
# token = _convert_terminal(name, pattern)
# if token:
# tokens.append(token)
#
# # Build Lezer grammar
# lezer_output = ["@top Start { scope+ }", ""]
#
# # Add rules
# for rule in lezer_rules:
# lezer_output.append(rule)
#
# lezer_output.append("")
# lezer_output.append("@tokens {")
#
# # Add tokens
# for token in tokens:
# lezer_output.append(f" {token}")
#
# # Add common tokens
# lezer_output.extend([
# ' whitespace { $[ \\t]+ }',
# ' newline { $[\\n\\r] }',
# ' Comment { "#" ![$\\n]* }',
# ])
#
# lezer_output.append("}")
# lezer_output.append("")
# lezer_output.append("@skip { whitespace | Comment }")
#
# return "\n".join(lezer_output)
#
#
# def _convert_rule(name: str, body: str) -> str:
# """Convert a single Lark rule to Lezer format."""
# # Skip internal rules (starting with _)
# if name.startswith("_"):
# return ""
#
# # Convert rule name to PascalCase for Lezer
# lezer_name = _to_pascal_case(name)
#
# # Convert body
# lezer_body = _convert_body(body)
#
# if lezer_body:
# return f"{lezer_name} {{ {lezer_body} }}"
# return ""
#
#
# def _convert_terminal(name: str, pattern: str) -> str:
# """Convert a Lark terminal to Lezer token format."""
# pattern = pattern.strip()
#
# # Handle regex patterns
# if pattern.startswith("/") and pattern.endswith("/"):
# regex = pattern[1:-1]
# # Convert to Lezer regex format
# return f'{name} {{ ${regex}$ }}'
#
# # Handle string literals
# if pattern.startswith('"') or pattern.startswith("'"):
# return f'{name} {{ {pattern} }}'
#
# # Handle alternatives (literal strings separated by |)
# if "|" in pattern:
# alternatives = [alt.strip() for alt in pattern.split("|")]
# if all(alt.startswith('"') or alt.startswith("'") for alt in alternatives):
# return f'{name} {{ {" | ".join(alternatives)} }}'
#
# return ""
#
#
# def _convert_body(body: str) -> str:
# """Convert the body of a Lark rule to Lezer format."""
# # Remove inline transformations (-> name)
# body = re.sub(r"\s*->\s*\w+", "", body)
#
# # Convert alternatives
# parts = []
# for alt in body.split("|"):
# alt = alt.strip()
# if alt:
# converted = _convert_sequence(alt)
# if converted:
# parts.append(converted)
#
# return " | ".join(parts)
#
#
# def _convert_sequence(seq: str) -> str:
# """Convert a sequence of items in a rule."""
# items = []
#
# # Tokenize the sequence
# tokens = re.findall(
# r'"[^"]*"|\'[^\']*\'|/[^/]+/|\([^)]+\)|\[[^\]]+\]|[a-zA-Z_][a-zA-Z0-9_]*|\?|\*|\+',
# seq
# )
#
# for token in tokens:
# if token.startswith('"') or token.startswith("'"):
# # String literal
# items.append(token)
# elif token.startswith("("):
# # Group
# inner = token[1:-1]
# items.append(f"({_convert_body(inner)})")
# elif token.startswith("["):
# # Optional group in Lark
# inner = token[1:-1]
# items.append(f"({_convert_body(inner)})?")
# elif token in ("?", "*", "+"):
# # Quantifiers - attach to previous item
# if items:
# items[-1] = items[-1] + token
# elif token.isupper() or token.startswith("_"):
# # Terminal reference
# items.append(token)
# elif token.islower() or "_" in token:
# # Rule reference - convert to PascalCase
# items.append(_to_pascal_case(token))
#
# return " ".join(items)
#
#
# def _to_pascal_case(name: str) -> str:
# """Convert snake_case to PascalCase."""
# return "".join(word.capitalize() for word in name.split("_"))
#
#
# def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]:
# """
# Extract completion items from a Lark grammar.
#
# Parses the grammar to find:
# - Keywords (reserved words like if, not, and)
# - Operators (==, !=, contains, etc.)
# - Functions (style, format, etc.)
# - Types (number, date, boolean, etc.)
# - Literals (True, False, etc.)
#
# Args:
# lark_grammar: The Lark grammar string.
#
# Returns:
# Dictionary with completion categories.
# """
# keywords: Set[str] = set()
# operators: Set[str] = set()
# functions: Set[str] = set()
# types: Set[str] = set()
# literals: Set[str] = set()
#
# # Find all quoted strings (potential keywords/operators)
# quoted_strings = re.findall(r'"([^"]+)"', lark_grammar)
#
# # Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False")
# terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar)
# for match in terminal_literals:
# for literal in match:
# if literal:
# quoted_strings.append(literal)
#
# for s in quoted_strings:
# s_lower = s.lower()
#
# # Classify based on pattern
# if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"):
# operators.add(s)
# elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"):
# operators.add(s_lower)
# elif s_lower in ("if", "not", "and", "or"):
# keywords.add(s_lower)
# elif s_lower in ("true", "false"):
# literals.add(s)
# elif s_lower in ("style", "format"):
# functions.add(s_lower)
# elif s_lower in ("column", "row", "cell", "value", "col"):
# keywords.add(s_lower)
# elif s_lower in ("number", "date", "boolean", "text", "enum"):
# types.add(s_lower)
# elif s_lower == "case":
# keywords.add(s_lower)
#
# # Find function-like patterns: word "("
# function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar)
# for func in function_patterns:
# if func.lower() not in ("true", "false"):
# functions.add(func.lower())
#
# # Find type patterns from format_type rule
# type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL)
# if type_match:
# type_strings = re.findall(r'"(\w+)"', type_match.group(1))
# types.update(t.lower() for t in type_strings)
#
# return {
# "keywords": sorted(keywords),
# "operators": sorted(operators),
# "functions": sorted(functions),
# "types": sorted(types),
# "literals": sorted(literals),
# }

View File

@@ -1,12 +1,14 @@
"""
Utilities for converting Lark grammars to CodeMirror 5 Simple Mode format.
This module provides functions to extract regex patterns from Lark grammar
terminals and generate a CodeMirror Simple Mode configuration for syntax highlighting.
This module provides functions to:
1. Extract regex patterns from Lark grammar terminals
2. Generate CodeMirror Simple Mode configuration for syntax highlighting
3. Extract completion items from Lark grammar (keywords, operators, etc.)
"""
import re
from typing import Dict, List, Any
from typing import Dict, List, Any, Set
def lark_to_simple_mode(lark_grammar: str) -> Dict[str, Any]:
@@ -238,3 +240,85 @@ def generate_formatting_dsl_mode() -> Dict[str, Any]:
{"regex": r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", "token": "variable"},
]
}
def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]:
"""
Extract completion items from a Lark grammar.
Parses the grammar to find:
- Keywords (reserved words like if, not, and)
- Operators (==, !=, contains, etc.)
- Functions (style, format, etc.)
- Types (number, date, boolean, etc.)
- Literals (True, False, etc.)
Args:
lark_grammar: The Lark grammar string.
Returns:
Dictionary with completion categories:
{
"keywords": [...],
"operators": [...],
"functions": [...],
"types": [...],
"literals": [...]
}
"""
keywords: Set[str] = set()
operators: Set[str] = set()
functions: Set[str] = set()
types: Set[str] = set()
literals: Set[str] = set()
# Find all quoted strings (potential keywords/operators)
quoted_strings = re.findall(r'"([^"]+)"', lark_grammar)
# Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False")
terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar)
for match in terminal_literals:
for literal in match:
if literal:
quoted_strings.append(literal)
for s in quoted_strings:
s_lower = s.lower()
# Classify based on pattern
if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"):
operators.add(s)
elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"):
operators.add(s_lower)
elif s_lower in ("if", "not", "and", "or"):
keywords.add(s_lower)
elif s_lower in ("true", "false"):
literals.add(s)
elif s_lower in ("style", "format"):
functions.add(s_lower)
elif s_lower in ("column", "row", "cell", "value", "col"):
keywords.add(s_lower)
elif s_lower in ("number", "date", "boolean", "text", "enum"):
types.add(s_lower)
elif s_lower == "case":
keywords.add(s_lower)
# Find function-like patterns: word "("
function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar)
for func in function_patterns:
if func.lower() not in ("true", "false"):
functions.add(func.lower())
# Find type patterns from format_type rule
type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL)
if type_match:
type_strings = re.findall(r'"(\w+)"', type_match.group(1))
types.update(t.lower() for t in type_strings)
return {
"keywords": sorted(keywords),
"operators": sorted(operators),
"functions": sorted(functions),
"types": sorted(types),
"literals": sorted(literals),
}

View File

@@ -283,9 +283,11 @@ def _get_column_value_suggestions(
"""Get column value suggestions based on the current scope."""
if not scope.column_name:
return []
try:
values = provider.list_column_values(scope.column_name)
# Use table_name from scope, or empty string as fallback
table_name = scope.table_name or ""
values = provider.list_column_values(table_name, scope.column_name)
suggestions = []
for value in values:
if value is None: