diff --git a/src/myfasthtml/core/dsl/base.py b/src/myfasthtml/core/dsl/base.py
index f18aa1e..6da7ba5 100644
--- a/src/myfasthtml/core/dsl/base.py
+++ b/src/myfasthtml/core/dsl/base.py
@@ -9,10 +9,7 @@ from abc import ABC, abstractmethod
from functools import cached_property
from typing import List, Dict, Any
-# TODO: Replace with lark_to_simple_mode when implemented
-from myfasthtml.core.dsl.lark_to_lezer import (
- extract_completions_from_grammar, # Will be moved to utils.py
-)
+from myfasthtml.core.dsl.lark_to_simple_mode import extract_completions_from_grammar
from myfasthtml.core.utils import make_safe_id
@@ -82,13 +79,13 @@ class DSLDefinition(ABC):
Returns:
Dictionary with:
- - 'lezerGrammar': The Lezer grammar string
+ - 'simpleModeConfig': The CodeMirror Simple Mode configuration
- 'completions': The completion items
- 'name': The DSL name
"""
return {
"name": self.name,
- "lezerGrammar": self.lezer_grammar,
+ "simpleModeConfig": self.simple_mode_config,
"completions": self.completions,
}
diff --git a/src/myfasthtml/core/dsl/lark_to_lezer.py b/src/myfasthtml/core/dsl/lark_to_lezer.py
deleted file mode 100644
index 899a035..0000000
--- a/src/myfasthtml/core/dsl/lark_to_lezer.py
+++ /dev/null
@@ -1,267 +0,0 @@
-# """
-# DEPRECATED: Utilities for converting Lark grammars to Lezer format.
-#
-# ⚠️ WARNING: This module is deprecated and will be removed in a future version.
-#
-# Original purpose:
-# - Transform a Lark grammar to a Lezer grammar for CodeMirror 6
-# - Extract completion items (keywords, operators, etc.) from a Lark grammar
-#
-# Deprecation reason:
-# - CodeMirror 6 requires a bundler (Webpack, Rollup, etc.)
-# - Incompatible with FastHTML's direct script inclusion approach
-# - Replaced by CodeMirror 5 Simple Mode (see lark_to_simple_mode.py)
-#
-# Migration path:
-# - Use lark_to_simple_mode.py for CodeMirror 5 syntax highlighting
-# - extract_completions_from_grammar() is still used and will be moved to utils.py
-# """
-#
-# import re
-# from typing import Dict, List, Set
-#
-#
-# def lark_to_lezer_grammar(lark_grammar: str) -> str:
-# """
-# Convert a Lark grammar to a Lezer grammar.
-#
-# This is a simplified converter that handles common Lark patterns.
-# Complex grammars may require manual adjustment.
-#
-# Args:
-# lark_grammar: The Lark grammar string.
-#
-# Returns:
-# The Lezer grammar string.
-# """
-# lines = lark_grammar.strip().split("\n")
-# lezer_rules = []
-# tokens = []
-#
-# for line in lines:
-# line = line.strip()
-#
-# # Skip empty lines and comments
-# if not line or line.startswith("//") or line.startswith("#"):
-# continue
-#
-# # Skip Lark-specific directives
-# if line.startswith("%"):
-# continue
-#
-# # Parse rule definitions (lowercase names only)
-# rule_match = re.match(r"^([a-z_][a-z0-9_]*)\s*:\s*(.+)$", line)
-# if rule_match:
-# name, body = rule_match.groups()
-# lezer_rule = _convert_rule(name, body)
-# if lezer_rule:
-# lezer_rules.append(lezer_rule)
-# continue
-#
-# # Parse terminal definitions (uppercase names)
-# terminal_match = re.match(r"^([A-Z_][A-Z0-9_]*)\s*:\s*(.+)$", line)
-# if terminal_match:
-# name, pattern = terminal_match.groups()
-# token = _convert_terminal(name, pattern)
-# if token:
-# tokens.append(token)
-#
-# # Build Lezer grammar
-# lezer_output = ["@top Start { scope+ }", ""]
-#
-# # Add rules
-# for rule in lezer_rules:
-# lezer_output.append(rule)
-#
-# lezer_output.append("")
-# lezer_output.append("@tokens {")
-#
-# # Add tokens
-# for token in tokens:
-# lezer_output.append(f" {token}")
-#
-# # Add common tokens
-# lezer_output.extend([
-# ' whitespace { $[ \\t]+ }',
-# ' newline { $[\\n\\r] }',
-# ' Comment { "#" ![$\\n]* }',
-# ])
-#
-# lezer_output.append("}")
-# lezer_output.append("")
-# lezer_output.append("@skip { whitespace | Comment }")
-#
-# return "\n".join(lezer_output)
-#
-#
-# def _convert_rule(name: str, body: str) -> str:
-# """Convert a single Lark rule to Lezer format."""
-# # Skip internal rules (starting with _)
-# if name.startswith("_"):
-# return ""
-#
-# # Convert rule name to PascalCase for Lezer
-# lezer_name = _to_pascal_case(name)
-#
-# # Convert body
-# lezer_body = _convert_body(body)
-#
-# if lezer_body:
-# return f"{lezer_name} {{ {lezer_body} }}"
-# return ""
-#
-#
-# def _convert_terminal(name: str, pattern: str) -> str:
-# """Convert a Lark terminal to Lezer token format."""
-# pattern = pattern.strip()
-#
-# # Handle regex patterns
-# if pattern.startswith("/") and pattern.endswith("/"):
-# regex = pattern[1:-1]
-# # Convert to Lezer regex format
-# return f'{name} {{ ${regex}$ }}'
-#
-# # Handle string literals
-# if pattern.startswith('"') or pattern.startswith("'"):
-# return f'{name} {{ {pattern} }}'
-#
-# # Handle alternatives (literal strings separated by |)
-# if "|" in pattern:
-# alternatives = [alt.strip() for alt in pattern.split("|")]
-# if all(alt.startswith('"') or alt.startswith("'") for alt in alternatives):
-# return f'{name} {{ {" | ".join(alternatives)} }}'
-#
-# return ""
-#
-#
-# def _convert_body(body: str) -> str:
-# """Convert the body of a Lark rule to Lezer format."""
-# # Remove inline transformations (-> name)
-# body = re.sub(r"\s*->\s*\w+", "", body)
-#
-# # Convert alternatives
-# parts = []
-# for alt in body.split("|"):
-# alt = alt.strip()
-# if alt:
-# converted = _convert_sequence(alt)
-# if converted:
-# parts.append(converted)
-#
-# return " | ".join(parts)
-#
-#
-# def _convert_sequence(seq: str) -> str:
-# """Convert a sequence of items in a rule."""
-# items = []
-#
-# # Tokenize the sequence
-# tokens = re.findall(
-# r'"[^"]*"|\'[^\']*\'|/[^/]+/|\([^)]+\)|\[[^\]]+\]|[a-zA-Z_][a-zA-Z0-9_]*|\?|\*|\+',
-# seq
-# )
-#
-# for token in tokens:
-# if token.startswith('"') or token.startswith("'"):
-# # String literal
-# items.append(token)
-# elif token.startswith("("):
-# # Group
-# inner = token[1:-1]
-# items.append(f"({_convert_body(inner)})")
-# elif token.startswith("["):
-# # Optional group in Lark
-# inner = token[1:-1]
-# items.append(f"({_convert_body(inner)})?")
-# elif token in ("?", "*", "+"):
-# # Quantifiers - attach to previous item
-# if items:
-# items[-1] = items[-1] + token
-# elif token.isupper() or token.startswith("_"):
-# # Terminal reference
-# items.append(token)
-# elif token.islower() or "_" in token:
-# # Rule reference - convert to PascalCase
-# items.append(_to_pascal_case(token))
-#
-# return " ".join(items)
-#
-#
-# def _to_pascal_case(name: str) -> str:
-# """Convert snake_case to PascalCase."""
-# return "".join(word.capitalize() for word in name.split("_"))
-#
-#
-# def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]:
-# """
-# Extract completion items from a Lark grammar.
-#
-# Parses the grammar to find:
-# - Keywords (reserved words like if, not, and)
-# - Operators (==, !=, contains, etc.)
-# - Functions (style, format, etc.)
-# - Types (number, date, boolean, etc.)
-# - Literals (True, False, etc.)
-#
-# Args:
-# lark_grammar: The Lark grammar string.
-#
-# Returns:
-# Dictionary with completion categories.
-# """
-# keywords: Set[str] = set()
-# operators: Set[str] = set()
-# functions: Set[str] = set()
-# types: Set[str] = set()
-# literals: Set[str] = set()
-#
-# # Find all quoted strings (potential keywords/operators)
-# quoted_strings = re.findall(r'"([^"]+)"', lark_grammar)
-#
-# # Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False")
-# terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar)
-# for match in terminal_literals:
-# for literal in match:
-# if literal:
-# quoted_strings.append(literal)
-#
-# for s in quoted_strings:
-# s_lower = s.lower()
-#
-# # Classify based on pattern
-# if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"):
-# operators.add(s)
-# elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"):
-# operators.add(s_lower)
-# elif s_lower in ("if", "not", "and", "or"):
-# keywords.add(s_lower)
-# elif s_lower in ("true", "false"):
-# literals.add(s)
-# elif s_lower in ("style", "format"):
-# functions.add(s_lower)
-# elif s_lower in ("column", "row", "cell", "value", "col"):
-# keywords.add(s_lower)
-# elif s_lower in ("number", "date", "boolean", "text", "enum"):
-# types.add(s_lower)
-# elif s_lower == "case":
-# keywords.add(s_lower)
-#
-# # Find function-like patterns: word "("
-# function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar)
-# for func in function_patterns:
-# if func.lower() not in ("true", "false"):
-# functions.add(func.lower())
-#
-# # Find type patterns from format_type rule
-# type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL)
-# if type_match:
-# type_strings = re.findall(r'"(\w+)"', type_match.group(1))
-# types.update(t.lower() for t in type_strings)
-#
-# return {
-# "keywords": sorted(keywords),
-# "operators": sorted(operators),
-# "functions": sorted(functions),
-# "types": sorted(types),
-# "literals": sorted(literals),
-# }
diff --git a/src/myfasthtml/core/dsl/lark_to_simple_mode.py b/src/myfasthtml/core/dsl/lark_to_simple_mode.py
index 552ab19..100d4c1 100644
--- a/src/myfasthtml/core/dsl/lark_to_simple_mode.py
+++ b/src/myfasthtml/core/dsl/lark_to_simple_mode.py
@@ -1,12 +1,14 @@
"""
Utilities for converting Lark grammars to CodeMirror 5 Simple Mode format.
-This module provides functions to extract regex patterns from Lark grammar
-terminals and generate a CodeMirror Simple Mode configuration for syntax highlighting.
+This module provides functions to:
+1. Extract regex patterns from Lark grammar terminals
+2. Generate CodeMirror Simple Mode configuration for syntax highlighting
+3. Extract completion items from Lark grammar (keywords, operators, etc.)
"""
import re
-from typing import Dict, List, Any
+from typing import Dict, List, Any, Set
def lark_to_simple_mode(lark_grammar: str) -> Dict[str, Any]:
@@ -238,3 +240,85 @@ def generate_formatting_dsl_mode() -> Dict[str, Any]:
{"regex": r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", "token": "variable"},
]
}
+
+
+def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]:
+ """
+ Extract completion items from a Lark grammar.
+
+ Parses the grammar to find:
+ - Keywords (reserved words like if, not, and)
+ - Operators (==, !=, contains, etc.)
+ - Functions (style, format, etc.)
+ - Types (number, date, boolean, etc.)
+ - Literals (True, False, etc.)
+
+ Args:
+ lark_grammar: The Lark grammar string.
+
+ Returns:
+ Dictionary with completion categories:
+ {
+ "keywords": [...],
+ "operators": [...],
+ "functions": [...],
+ "types": [...],
+ "literals": [...]
+ }
+ """
+ keywords: Set[str] = set()
+ operators: Set[str] = set()
+ functions: Set[str] = set()
+ types: Set[str] = set()
+ literals: Set[str] = set()
+
+ # Find all quoted strings (potential keywords/operators)
+ quoted_strings = re.findall(r'"([^"]+)"', lark_grammar)
+
+ # Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False")
+ terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar)
+ for match in terminal_literals:
+ for literal in match:
+ if literal:
+ quoted_strings.append(literal)
+
+ for s in quoted_strings:
+ s_lower = s.lower()
+
+ # Classify based on pattern
+ if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"):
+ operators.add(s)
+ elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"):
+ operators.add(s_lower)
+ elif s_lower in ("if", "not", "and", "or"):
+ keywords.add(s_lower)
+ elif s_lower in ("true", "false"):
+ literals.add(s)
+ elif s_lower in ("style", "format"):
+ functions.add(s_lower)
+ elif s_lower in ("column", "row", "cell", "value", "col"):
+ keywords.add(s_lower)
+ elif s_lower in ("number", "date", "boolean", "text", "enum"):
+ types.add(s_lower)
+ elif s_lower == "case":
+ keywords.add(s_lower)
+
+ # Find function-like patterns: word "("
+ function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar)
+ for func in function_patterns:
+ if func.lower() not in ("true", "false"):
+ functions.add(func.lower())
+
+ # Find type patterns from format_type rule
+ type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL)
+ if type_match:
+ type_strings = re.findall(r'"(\w+)"', type_match.group(1))
+ types.update(t.lower() for t in type_strings)
+
+ return {
+ "keywords": sorted(keywords),
+ "operators": sorted(operators),
+ "functions": sorted(functions),
+ "types": sorted(types),
+ "literals": sorted(literals),
+ }
diff --git a/src/myfasthtml/core/formatting/dsl/completion/suggestions.py b/src/myfasthtml/core/formatting/dsl/completion/suggestions.py
index b5621f0..eae847d 100644
--- a/src/myfasthtml/core/formatting/dsl/completion/suggestions.py
+++ b/src/myfasthtml/core/formatting/dsl/completion/suggestions.py
@@ -283,9 +283,11 @@ def _get_column_value_suggestions(
"""Get column value suggestions based on the current scope."""
if not scope.column_name:
return []
-
+
try:
- values = provider.list_column_values(scope.column_name)
+ # Use table_name from scope, or empty string as fallback
+ table_name = scope.table_name or ""
+ values = provider.list_column_values(table_name, scope.column_name)
suggestions = []
for value in values:
if value is None:
diff --git a/tests/core/dsl/test_lark_to_lezer.py b/tests/core/dsl/test_lark_to_lezer.py
deleted file mode 100644
index aa1f353..0000000
--- a/tests/core/dsl/test_lark_to_lezer.py
+++ /dev/null
@@ -1,172 +0,0 @@
-"""Tests for lark_to_lezer module."""
-
-import pytest
-
-from myfasthtml.core.dsl.lark_to_lezer import (
- extract_completions_from_grammar,
- lark_to_lezer_grammar,
-)
-
-# Sample grammars for testing
-SIMPLE_GRAMMAR = r'''
- start: rule+
- rule: "if" condition
- condition: "value" operator literal
- operator: "==" -> op_eq
- | "!=" -> op_ne
- | "contains" -> op_contains
- literal: QUOTED_STRING -> string_literal
- | BOOLEAN -> boolean_literal
- QUOTED_STRING: /"[^"]*"/
- BOOLEAN: "True" | "False"
-'''
-
-GRAMMAR_WITH_KEYWORDS = r'''
- start: scope+
- scope: "column" NAME ":" rule
- | "row" INTEGER ":" rule
- | "cell" cell_ref ":" rule
- rule: style_expr condition?
- condition: "if" "not"? comparison
- comparison: operand "and" operand
- | operand "or" operand
- style_expr: "style" "(" args ")"
- operand: "value" | literal
-'''
-
-GRAMMAR_WITH_TYPES = r'''
- format_type: "number" -> fmt_number
- | "date" -> fmt_date
- | "boolean" -> fmt_boolean
- | "text" -> fmt_text
- | "enum" -> fmt_enum
-'''
-
-
-class TestExtractCompletions:
- """Tests for extract_completions_from_grammar function."""
-
- def test_i_can_extract_keywords_from_grammar(self):
- """Test that keywords like if, not, and are extracted."""
- completions = extract_completions_from_grammar(GRAMMAR_WITH_KEYWORDS)
-
- assert "if" in completions["keywords"]
- assert "not" in completions["keywords"]
- assert "column" in completions["keywords"]
- assert "row" in completions["keywords"]
- assert "cell" in completions["keywords"]
- assert "value" in completions["keywords"]
-
- @pytest.mark.parametrize(
- "operator",
- ["==", "!=", "contains"],
- )
- def test_i_can_extract_operators_from_grammar(self, operator):
- """Test that operators are extracted from grammar."""
- completions = extract_completions_from_grammar(SIMPLE_GRAMMAR)
-
- assert operator in completions["operators"]
-
- def test_i_can_extract_functions_from_grammar(self):
- """Test that function-like constructs are extracted."""
- completions = extract_completions_from_grammar(GRAMMAR_WITH_KEYWORDS)
-
- assert "style" in completions["functions"]
-
- @pytest.mark.parametrize(
- "type_name",
- ["number", "date", "boolean", "text", "enum"],
- )
- def test_i_can_extract_types_from_grammar(self, type_name):
- """Test that type names are extracted from format_type rule."""
- completions = extract_completions_from_grammar(GRAMMAR_WITH_TYPES)
-
- assert type_name in completions["types"]
-
- @pytest.mark.parametrize("literal", [
- "True",
- "False"
- ])
- def test_i_can_extract_literals_from_grammar(self, literal):
- """Test that literal values like True/False are extracted."""
- completions = extract_completions_from_grammar(SIMPLE_GRAMMAR)
-
- assert literal in completions["literals"]
-
- def test_i_can_extract_completions_returns_all_categories(self):
- """Test that all completion categories are present in result."""
- completions = extract_completions_from_grammar(SIMPLE_GRAMMAR)
-
- assert "keywords" in completions
- assert "operators" in completions
- assert "functions" in completions
- assert "types" in completions
- assert "literals" in completions
-
- def test_i_can_extract_completions_returns_sorted_lists(self):
- """Test that completion lists are sorted alphabetically."""
- completions = extract_completions_from_grammar(SIMPLE_GRAMMAR)
-
- for category in completions.values():
- assert category == sorted(category)
-
-
-class TestLarkToLezerConversion:
- """Tests for lark_to_lezer_grammar function."""
-
- def test_i_can_convert_simple_grammar_to_lezer(self):
- """Test that a simple Lark grammar is converted to Lezer format."""
- lezer = lark_to_lezer_grammar(SIMPLE_GRAMMAR)
-
- # Should have @top directive
- assert "@top Start" in lezer
- # Should have @tokens block
- assert "@tokens {" in lezer
- # Should have @skip directive
- assert "@skip {" in lezer
-
- def test_i_can_convert_rule_names_to_pascal_case(self):
- """Test that snake_case rule names become PascalCase."""
- grammar = r'''
- my_rule: other_rule
- other_rule: "test"
- '''
- lezer = lark_to_lezer_grammar(grammar)
-
- assert "MyRule" in lezer
- assert "OtherRule" in lezer
-
- def test_i_cannot_include_internal_rules_in_lezer(self):
- """Test that rules starting with _ are not included."""
- grammar = r'''
- start: rule _NL
- rule: "test"
- _NL: /\n/
- '''
- lezer = lark_to_lezer_grammar(grammar)
-
- # Internal rules should not appear as Lezer rules
- assert "Nl {" not in lezer
-
- def test_i_can_convert_terminal_regex_to_lezer(self):
- """Test that terminal regex patterns are converted."""
- grammar = r'''
- NAME: /[a-zA-Z_][a-zA-Z0-9_]*/
- '''
- lezer = lark_to_lezer_grammar(grammar)
-
- assert "NAME" in lezer
-
- @pytest.mark.parametrize(
- "terminal,pattern",
- [
- ('BOOLEAN: "True" | "False"', "BOOLEAN"),
- ('KEYWORD: "if"', "KEYWORD"),
- ],
- )
- def test_i_can_convert_terminal_strings_to_lezer(self, terminal, pattern):
- """Test that terminal string literals are converted."""
- grammar = f"start: test\n{terminal}"
- lezer = lark_to_lezer_grammar(grammar)
-
- assert pattern in lezer
diff --git a/tests/core/formatting/dsl/test_completion.py b/tests/core/formatting/dsl/test_completion.py
index 68de035..b57c41a 100644
--- a/tests/core/formatting/dsl/test_completion.py
+++ b/tests/core/formatting/dsl/test_completion.py
@@ -34,13 +34,13 @@ class MockProvider:
Provides predefined data for columns, values, and presets.
"""
- def get_tables(self) -> list[str]:
+ def list_tables(self) -> list[str]:
return ["app.orders"]
- def get_columns(self, table: str) -> list[str]:
+ def list_columns(self, table: str) -> list[str]:
return ["id", "amount", "status"]
- def get_column_values(self, column: str) -> list[Any]:
+ def list_column_values(self, table: str, column: str) -> list[Any]:
if column == "status":
return ["draft", "pending", "approved"]
if column == "amount":
@@ -50,10 +50,10 @@ class MockProvider:
def get_row_count(self, table: str) -> int:
return 150
- def get_style_presets(self) -> list[str]:
+ def list_style_presets(self) -> list[str]:
return ["custom_highlight"]
- def get_format_presets(self) -> list[str]:
+ def list_format_presets(self) -> list[str]:
return ["CHF"]
diff --git a/tests/core/formatting/test_formatting_dsl_definition.py b/tests/core/formatting/test_formatting_dsl_definition.py
index 3ac5d1f..4213349 100644
--- a/tests/core/formatting/test_formatting_dsl_definition.py
+++ b/tests/core/formatting/test_formatting_dsl_definition.py
@@ -84,14 +84,14 @@ class TestFormattingDSL:
assert completions1 is completions2
- def test_i_can_get_lezer_grammar_is_cached(self):
- """Test that lezer_grammar property is cached (same object returned)."""
+ def test_i_can_get_simple_mode_config_is_cached(self):
+ """Test that simple_mode_config property is cached (same object returned)."""
dsl = FormattingDSL()
- lezer1 = dsl.lezer_grammar
- lezer2 = dsl.lezer_grammar
+ config1 = dsl.simple_mode_config
+ config2 = dsl.simple_mode_config
- assert lezer1 is lezer2
+ assert config1 is config2
def test_i_can_get_editor_config(self):
"""Test that get_editor_config() returns expected structure."""
@@ -100,6 +100,7 @@ class TestFormattingDSL:
config = dsl.get_editor_config()
assert "name" in config
- assert "lezerGrammar" in config
+ assert "simpleModeConfig" in config
assert "completions" in config
assert config["name"] == "Formatting DSL"
+ assert "start" in config["simpleModeConfig"] # Simple Mode structure