Added syntax colorization. Remove all references to deprecated lark_to_lezer module.

2026-02-07 11:08:34 +01:00
parent 1c1ced2a9f
commit ab4f251f0c
7 changed files with 106 additions and 461 deletions
@@ -9,10 +9,7 @@ from abc import ABC, abstractmethod
 from functools import cached_property
 from typing import List, Dict, Any

-# TODO: Replace with lark_to_simple_mode when implemented
-from myfasthtml.core.dsl.lark_to_lezer import (
-  extract_completions_from_grammar,  # Will be moved to utils.py
-)
+from myfasthtml.core.dsl.lark_to_simple_mode import extract_completions_from_grammar
 from myfasthtml.core.utils import make_safe_id


@@ -82,13 +79,13 @@ class DSLDefinition(ABC):

    Returns:
        Dictionary with:
-        - 'lezerGrammar': The Lezer grammar string
+        - 'simpleModeConfig': The CodeMirror Simple Mode configuration
        - 'completions': The completion items
        - 'name': The DSL name
    """
    return {
        "name": self.name,
-        "lezerGrammar": self.lezer_grammar,
+        "simpleModeConfig": self.simple_mode_config,
        "completions": self.completions,
    }
  
@@ -1,267 +0,0 @@
-# """
-# DEPRECATED: Utilities for converting Lark grammars to Lezer format.
-#
-# ⚠️ WARNING: This module is deprecated and will be removed in a future version.
-#
-# Original purpose:
-# - Transform a Lark grammar to a Lezer grammar for CodeMirror 6
-# - Extract completion items (keywords, operators, etc.) from a Lark grammar
-#
-# Deprecation reason:
-# - CodeMirror 6 requires a bundler (Webpack, Rollup, etc.)
-# - Incompatible with FastHTML's direct script inclusion approach
-# - Replaced by CodeMirror 5 Simple Mode (see lark_to_simple_mode.py)
-#
-# Migration path:
-# - Use lark_to_simple_mode.py for CodeMirror 5 syntax highlighting
-# - extract_completions_from_grammar() is still used and will be moved to utils.py
-# """
-#
-# import re
-# from typing import Dict, List, Set
-#
-#
-# def lark_to_lezer_grammar(lark_grammar: str) -> str:
-#   """
-#   Convert a Lark grammar to a Lezer grammar.
-#
-#   This is a simplified converter that handles common Lark patterns.
-#   Complex grammars may require manual adjustment.
-#
-#   Args:
-#       lark_grammar: The Lark grammar string.
-#
-#   Returns:
-#       The Lezer grammar string.
-#   """
-#   lines = lark_grammar.strip().split("\n")
-#   lezer_rules = []
-#   tokens = []
-#
-#   for line in lines:
-#     line = line.strip()
-#
-#     # Skip empty lines and comments
-#     if not line or line.startswith("//") or line.startswith("#"):
-#       continue
-#
-#     # Skip Lark-specific directives
-#     if line.startswith("%"):
-#       continue
-#
-#     # Parse rule definitions (lowercase names only)
-#     rule_match = re.match(r"^([a-z_][a-z0-9_]*)\s*:\s*(.+)$", line)
-#     if rule_match:
-#       name, body = rule_match.groups()
-#       lezer_rule = _convert_rule(name, body)
-#       if lezer_rule:
-#         lezer_rules.append(lezer_rule)
-#       continue
-#
-#     # Parse terminal definitions (uppercase names)
-#     terminal_match = re.match(r"^([A-Z_][A-Z0-9_]*)\s*:\s*(.+)$", line)
-#     if terminal_match:
-#       name, pattern = terminal_match.groups()
-#       token = _convert_terminal(name, pattern)
-#       if token:
-#         tokens.append(token)
-#
-#   # Build Lezer grammar
-#   lezer_output = ["@top Start { scope+ }", ""]
-#
-#   # Add rules
-#   for rule in lezer_rules:
-#     lezer_output.append(rule)
-#
-#   lezer_output.append("")
-#   lezer_output.append("@tokens {")
-#
-#   # Add tokens
-#   for token in tokens:
-#     lezer_output.append(f"  {token}")
-#
-#   # Add common tokens
-#   lezer_output.extend([
-#       '  whitespace { $[ \\t]+ }',
-#       '  newline { $[\\n\\r] }',
-#       '  Comment { "#" ![$\\n]* }',
-#   ])
-#
-#   lezer_output.append("}")
-#   lezer_output.append("")
-#   lezer_output.append("@skip { whitespace | Comment }")
-#
-#   return "\n".join(lezer_output)
-#
-#
-# def _convert_rule(name: str, body: str) -> str:
-#   """Convert a single Lark rule to Lezer format."""
-#   # Skip internal rules (starting with _)
-#   if name.startswith("_"):
-#     return ""
-#
-#   # Convert rule name to PascalCase for Lezer
-#   lezer_name = _to_pascal_case(name)
-#
-#   # Convert body
-#   lezer_body = _convert_body(body)
-#
-#   if lezer_body:
-#     return f"{lezer_name} {{ {lezer_body} }}"
-#   return ""
-#
-#
-# def _convert_terminal(name: str, pattern: str) -> str:
-#   """Convert a Lark terminal to Lezer token format."""
-#   pattern = pattern.strip()
-#
-#   # Handle regex patterns
-#   if pattern.startswith("/") and pattern.endswith("/"):
-#     regex = pattern[1:-1]
-#     # Convert to Lezer regex format
-#     return f'{name} {{ ${regex}$ }}'
-#
-#   # Handle string literals
-#   if pattern.startswith('"') or pattern.startswith("'"):
-#     return f'{name} {{ {pattern} }}'
-#
-#   # Handle alternatives (literal strings separated by |)
-#   if "|" in pattern:
-#     alternatives = [alt.strip() for alt in pattern.split("|")]
-#     if all(alt.startswith('"') or alt.startswith("'") for alt in alternatives):
-#       return f'{name} {{ {" | ".join(alternatives)} }}'
-#
-#   return ""
-#
-#
-# def _convert_body(body: str) -> str:
-#   """Convert the body of a Lark rule to Lezer format."""
-#   # Remove inline transformations (-> name)
-#   body = re.sub(r"\s*->\s*\w+", "", body)
-#
-#   # Convert alternatives
-#   parts = []
-#   for alt in body.split("|"):
-#     alt = alt.strip()
-#     if alt:
-#       converted = _convert_sequence(alt)
-#       if converted:
-#         parts.append(converted)
-#
-#   return " | ".join(parts)
-#
-#
-# def _convert_sequence(seq: str) -> str:
-#   """Convert a sequence of items in a rule."""
-#   items = []
-#
-#   # Tokenize the sequence
-#   tokens = re.findall(
-#     r'"[^"]*"|\'[^\']*\'|/[^/]+/|\([^)]+\)|\[[^\]]+\]|[a-zA-Z_][a-zA-Z0-9_]*|\?|\*|\+',
-#     seq
-#   )
-#
-#   for token in tokens:
-#     if token.startswith('"') or token.startswith("'"):
-#       # String literal
-#       items.append(token)
-#     elif token.startswith("("):
-#       # Group
-#       inner = token[1:-1]
-#       items.append(f"({_convert_body(inner)})")
-#     elif token.startswith("["):
-#       # Optional group in Lark
-#       inner = token[1:-1]
-#       items.append(f"({_convert_body(inner)})?")
-#     elif token in ("?", "*", "+"):
-#       # Quantifiers - attach to previous item
-#       if items:
-#         items[-1] = items[-1] + token
-#     elif token.isupper() or token.startswith("_"):
-#       # Terminal reference
-#       items.append(token)
-#     elif token.islower() or "_" in token:
-#       # Rule reference - convert to PascalCase
-#       items.append(_to_pascal_case(token))
-#
-#   return " ".join(items)
-#
-#
-# def _to_pascal_case(name: str) -> str:
-#   """Convert snake_case to PascalCase."""
-#   return "".join(word.capitalize() for word in name.split("_"))
-#
-#
-# def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]:
-#   """
-#   Extract completion items from a Lark grammar.
-#
-#   Parses the grammar to find:
-#   - Keywords (reserved words like if, not, and)
-#   - Operators (==, !=, contains, etc.)
-#   - Functions (style, format, etc.)
-#   - Types (number, date, boolean, etc.)
-#   - Literals (True, False, etc.)
-#
-#   Args:
-#       lark_grammar: The Lark grammar string.
-#
-#   Returns:
-#       Dictionary with completion categories.
-#   """
-#   keywords: Set[str] = set()
-#   operators: Set[str] = set()
-#   functions: Set[str] = set()
-#   types: Set[str] = set()
-#   literals: Set[str] = set()
-#
-#   # Find all quoted strings (potential keywords/operators)
-#   quoted_strings = re.findall(r'"([^"]+)"', lark_grammar)
-#
-#   # Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False")
-#   terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar)
-#   for match in terminal_literals:
-#     for literal in match:
-#       if literal:
-#         quoted_strings.append(literal)
-#
-#   for s in quoted_strings:
-#     s_lower = s.lower()
-#
-#     # Classify based on pattern
-#     if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"):
-#       operators.add(s)
-#     elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"):
-#       operators.add(s_lower)
-#     elif s_lower in ("if", "not", "and", "or"):
-#       keywords.add(s_lower)
-#     elif s_lower in ("true", "false"):
-#       literals.add(s)
-#     elif s_lower in ("style", "format"):
-#       functions.add(s_lower)
-#     elif s_lower in ("column", "row", "cell", "value", "col"):
-#       keywords.add(s_lower)
-#     elif s_lower in ("number", "date", "boolean", "text", "enum"):
-#       types.add(s_lower)
-#     elif s_lower == "case":
-#       keywords.add(s_lower)
-#
-#   # Find function-like patterns: word "("
-#   function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar)
-#   for func in function_patterns:
-#     if func.lower() not in ("true", "false"):
-#       functions.add(func.lower())
-#
-#   # Find type patterns from format_type rule
-#   type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL)
-#   if type_match:
-#     type_strings = re.findall(r'"(\w+)"', type_match.group(1))
-#     types.update(t.lower() for t in type_strings)
-#
-#   return {
-#       "keywords": sorted(keywords),
-#       "operators": sorted(operators),
-#       "functions": sorted(functions),
-#       "types": sorted(types),
-#       "literals": sorted(literals),
-#   }
@@ -1,12 +1,14 @@
 """
 Utilities for converting Lark grammars to CodeMirror 5 Simple Mode format.

-This module provides functions to extract regex patterns from Lark grammar
-terminals and generate a CodeMirror Simple Mode configuration for syntax highlighting.
+This module provides functions to:
+1. Extract regex patterns from Lark grammar terminals
+2. Generate CodeMirror Simple Mode configuration for syntax highlighting
+3. Extract completion items from Lark grammar (keywords, operators, etc.)
 """

 import re
-from typing import Dict, List, Any
+from typing import Dict, List, Any, Set


 def lark_to_simple_mode(lark_grammar: str) -> Dict[str, Any]:
@@ -238,3 +240,85 @@ def generate_formatting_dsl_mode() -> Dict[str, Any]:
            {"regex": r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", "token": "variable"},
        ]
    }
+
+
+def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]:
+    """
+    Extract completion items from a Lark grammar.
+
+    Parses the grammar to find:
+    - Keywords (reserved words like if, not, and)
+    - Operators (==, !=, contains, etc.)
+    - Functions (style, format, etc.)
+    - Types (number, date, boolean, etc.)
+    - Literals (True, False, etc.)
+
+    Args:
+        lark_grammar: The Lark grammar string.
+
+    Returns:
+        Dictionary with completion categories:
+        {
+            "keywords": [...],
+            "operators": [...],
+            "functions": [...],
+            "types": [...],
+            "literals": [...]
+        }
+    """
+    keywords: Set[str] = set()
+    operators: Set[str] = set()
+    functions: Set[str] = set()
+    types: Set[str] = set()
+    literals: Set[str] = set()
+
+    # Find all quoted strings (potential keywords/operators)
+    quoted_strings = re.findall(r'"([^"]+)"', lark_grammar)
+
+    # Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False")
+    terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar)
+    for match in terminal_literals:
+        for literal in match:
+            if literal:
+                quoted_strings.append(literal)
+
+    for s in quoted_strings:
+        s_lower = s.lower()
+
+        # Classify based on pattern
+        if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"):
+            operators.add(s)
+        elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"):
+            operators.add(s_lower)
+        elif s_lower in ("if", "not", "and", "or"):
+            keywords.add(s_lower)
+        elif s_lower in ("true", "false"):
+            literals.add(s)
+        elif s_lower in ("style", "format"):
+            functions.add(s_lower)
+        elif s_lower in ("column", "row", "cell", "value", "col"):
+            keywords.add(s_lower)
+        elif s_lower in ("number", "date", "boolean", "text", "enum"):
+            types.add(s_lower)
+        elif s_lower == "case":
+            keywords.add(s_lower)
+
+    # Find function-like patterns: word "("
+    function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar)
+    for func in function_patterns:
+        if func.lower() not in ("true", "false"):
+            functions.add(func.lower())
+
+    # Find type patterns from format_type rule
+    type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL)
+    if type_match:
+        type_strings = re.findall(r'"(\w+)"', type_match.group(1))
+        types.update(t.lower() for t in type_strings)
+
+    return {
+        "keywords": sorted(keywords),
+        "operators": sorted(operators),
+        "functions": sorted(functions),
+        "types": sorted(types),
+        "literals": sorted(literals),
+    }
@@ -283,9 +283,11 @@ def _get_column_value_suggestions(
  """Get column value suggestions based on the current scope."""
  if not scope.column_name:
    return []
-  
+
  try:
-    values = provider.list_column_values(scope.column_name)
+    # Use table_name from scope, or empty string as fallback
+    table_name = scope.table_name or ""
+    values = provider.list_column_values(table_name, scope.column_name)
    suggestions = []
    for value in values:
      if value is None: