From ab4f251f0ccf33a289b1bab21ac9881a46054d29 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Sat, 7 Feb 2026 11:08:34 +0100 Subject: [PATCH] Added syntax colorization. Remove all references to deprecated lark_to_lezer module. --- src/myfasthtml/core/dsl/base.py | 9 +- src/myfasthtml/core/dsl/lark_to_lezer.py | 267 ------------------ .../core/dsl/lark_to_simple_mode.py | 90 +++++- .../formatting/dsl/completion/suggestions.py | 6 +- tests/core/dsl/test_lark_to_lezer.py | 172 ----------- tests/core/formatting/dsl/test_completion.py | 10 +- .../test_formatting_dsl_definition.py | 13 +- 7 files changed, 106 insertions(+), 461 deletions(-) delete mode 100644 src/myfasthtml/core/dsl/lark_to_lezer.py delete mode 100644 tests/core/dsl/test_lark_to_lezer.py diff --git a/src/myfasthtml/core/dsl/base.py b/src/myfasthtml/core/dsl/base.py index f18aa1e..6da7ba5 100644 --- a/src/myfasthtml/core/dsl/base.py +++ b/src/myfasthtml/core/dsl/base.py @@ -9,10 +9,7 @@ from abc import ABC, abstractmethod from functools import cached_property from typing import List, Dict, Any -# TODO: Replace with lark_to_simple_mode when implemented -from myfasthtml.core.dsl.lark_to_lezer import ( - extract_completions_from_grammar, # Will be moved to utils.py -) +from myfasthtml.core.dsl.lark_to_simple_mode import extract_completions_from_grammar from myfasthtml.core.utils import make_safe_id @@ -82,13 +79,13 @@ class DSLDefinition(ABC): Returns: Dictionary with: - - 'lezerGrammar': The Lezer grammar string + - 'simpleModeConfig': The CodeMirror Simple Mode configuration - 'completions': The completion items - 'name': The DSL name """ return { "name": self.name, - "lezerGrammar": self.lezer_grammar, + "simpleModeConfig": self.simple_mode_config, "completions": self.completions, } diff --git a/src/myfasthtml/core/dsl/lark_to_lezer.py b/src/myfasthtml/core/dsl/lark_to_lezer.py deleted file mode 100644 index 899a035..0000000 --- a/src/myfasthtml/core/dsl/lark_to_lezer.py +++ /dev/null @@ -1,267 +0,0 @@ -# """ -# DEPRECATED: Utilities for converting Lark grammars to Lezer format. -# -# ⚠️ WARNING: This module is deprecated and will be removed in a future version. -# -# Original purpose: -# - Transform a Lark grammar to a Lezer grammar for CodeMirror 6 -# - Extract completion items (keywords, operators, etc.) from a Lark grammar -# -# Deprecation reason: -# - CodeMirror 6 requires a bundler (Webpack, Rollup, etc.) -# - Incompatible with FastHTML's direct script inclusion approach -# - Replaced by CodeMirror 5 Simple Mode (see lark_to_simple_mode.py) -# -# Migration path: -# - Use lark_to_simple_mode.py for CodeMirror 5 syntax highlighting -# - extract_completions_from_grammar() is still used and will be moved to utils.py -# """ -# -# import re -# from typing import Dict, List, Set -# -# -# def lark_to_lezer_grammar(lark_grammar: str) -> str: -# """ -# Convert a Lark grammar to a Lezer grammar. -# -# This is a simplified converter that handles common Lark patterns. -# Complex grammars may require manual adjustment. -# -# Args: -# lark_grammar: The Lark grammar string. -# -# Returns: -# The Lezer grammar string. -# """ -# lines = lark_grammar.strip().split("\n") -# lezer_rules = [] -# tokens = [] -# -# for line in lines: -# line = line.strip() -# -# # Skip empty lines and comments -# if not line or line.startswith("//") or line.startswith("#"): -# continue -# -# # Skip Lark-specific directives -# if line.startswith("%"): -# continue -# -# # Parse rule definitions (lowercase names only) -# rule_match = re.match(r"^([a-z_][a-z0-9_]*)\s*:\s*(.+)$", line) -# if rule_match: -# name, body = rule_match.groups() -# lezer_rule = _convert_rule(name, body) -# if lezer_rule: -# lezer_rules.append(lezer_rule) -# continue -# -# # Parse terminal definitions (uppercase names) -# terminal_match = re.match(r"^([A-Z_][A-Z0-9_]*)\s*:\s*(.+)$", line) -# if terminal_match: -# name, pattern = terminal_match.groups() -# token = _convert_terminal(name, pattern) -# if token: -# tokens.append(token) -# -# # Build Lezer grammar -# lezer_output = ["@top Start { scope+ }", ""] -# -# # Add rules -# for rule in lezer_rules: -# lezer_output.append(rule) -# -# lezer_output.append("") -# lezer_output.append("@tokens {") -# -# # Add tokens -# for token in tokens: -# lezer_output.append(f" {token}") -# -# # Add common tokens -# lezer_output.extend([ -# ' whitespace { $[ \\t]+ }', -# ' newline { $[\\n\\r] }', -# ' Comment { "#" ![$\\n]* }', -# ]) -# -# lezer_output.append("}") -# lezer_output.append("") -# lezer_output.append("@skip { whitespace | Comment }") -# -# return "\n".join(lezer_output) -# -# -# def _convert_rule(name: str, body: str) -> str: -# """Convert a single Lark rule to Lezer format.""" -# # Skip internal rules (starting with _) -# if name.startswith("_"): -# return "" -# -# # Convert rule name to PascalCase for Lezer -# lezer_name = _to_pascal_case(name) -# -# # Convert body -# lezer_body = _convert_body(body) -# -# if lezer_body: -# return f"{lezer_name} {{ {lezer_body} }}" -# return "" -# -# -# def _convert_terminal(name: str, pattern: str) -> str: -# """Convert a Lark terminal to Lezer token format.""" -# pattern = pattern.strip() -# -# # Handle regex patterns -# if pattern.startswith("/") and pattern.endswith("/"): -# regex = pattern[1:-1] -# # Convert to Lezer regex format -# return f'{name} {{ ${regex}$ }}' -# -# # Handle string literals -# if pattern.startswith('"') or pattern.startswith("'"): -# return f'{name} {{ {pattern} }}' -# -# # Handle alternatives (literal strings separated by |) -# if "|" in pattern: -# alternatives = [alt.strip() for alt in pattern.split("|")] -# if all(alt.startswith('"') or alt.startswith("'") for alt in alternatives): -# return f'{name} {{ {" | ".join(alternatives)} }}' -# -# return "" -# -# -# def _convert_body(body: str) -> str: -# """Convert the body of a Lark rule to Lezer format.""" -# # Remove inline transformations (-> name) -# body = re.sub(r"\s*->\s*\w+", "", body) -# -# # Convert alternatives -# parts = [] -# for alt in body.split("|"): -# alt = alt.strip() -# if alt: -# converted = _convert_sequence(alt) -# if converted: -# parts.append(converted) -# -# return " | ".join(parts) -# -# -# def _convert_sequence(seq: str) -> str: -# """Convert a sequence of items in a rule.""" -# items = [] -# -# # Tokenize the sequence -# tokens = re.findall( -# r'"[^"]*"|\'[^\']*\'|/[^/]+/|\([^)]+\)|\[[^\]]+\]|[a-zA-Z_][a-zA-Z0-9_]*|\?|\*|\+', -# seq -# ) -# -# for token in tokens: -# if token.startswith('"') or token.startswith("'"): -# # String literal -# items.append(token) -# elif token.startswith("("): -# # Group -# inner = token[1:-1] -# items.append(f"({_convert_body(inner)})") -# elif token.startswith("["): -# # Optional group in Lark -# inner = token[1:-1] -# items.append(f"({_convert_body(inner)})?") -# elif token in ("?", "*", "+"): -# # Quantifiers - attach to previous item -# if items: -# items[-1] = items[-1] + token -# elif token.isupper() or token.startswith("_"): -# # Terminal reference -# items.append(token) -# elif token.islower() or "_" in token: -# # Rule reference - convert to PascalCase -# items.append(_to_pascal_case(token)) -# -# return " ".join(items) -# -# -# def _to_pascal_case(name: str) -> str: -# """Convert snake_case to PascalCase.""" -# return "".join(word.capitalize() for word in name.split("_")) -# -# -# def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]: -# """ -# Extract completion items from a Lark grammar. -# -# Parses the grammar to find: -# - Keywords (reserved words like if, not, and) -# - Operators (==, !=, contains, etc.) -# - Functions (style, format, etc.) -# - Types (number, date, boolean, etc.) -# - Literals (True, False, etc.) -# -# Args: -# lark_grammar: The Lark grammar string. -# -# Returns: -# Dictionary with completion categories. -# """ -# keywords: Set[str] = set() -# operators: Set[str] = set() -# functions: Set[str] = set() -# types: Set[str] = set() -# literals: Set[str] = set() -# -# # Find all quoted strings (potential keywords/operators) -# quoted_strings = re.findall(r'"([^"]+)"', lark_grammar) -# -# # Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False") -# terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar) -# for match in terminal_literals: -# for literal in match: -# if literal: -# quoted_strings.append(literal) -# -# for s in quoted_strings: -# s_lower = s.lower() -# -# # Classify based on pattern -# if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"): -# operators.add(s) -# elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"): -# operators.add(s_lower) -# elif s_lower in ("if", "not", "and", "or"): -# keywords.add(s_lower) -# elif s_lower in ("true", "false"): -# literals.add(s) -# elif s_lower in ("style", "format"): -# functions.add(s_lower) -# elif s_lower in ("column", "row", "cell", "value", "col"): -# keywords.add(s_lower) -# elif s_lower in ("number", "date", "boolean", "text", "enum"): -# types.add(s_lower) -# elif s_lower == "case": -# keywords.add(s_lower) -# -# # Find function-like patterns: word "(" -# function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar) -# for func in function_patterns: -# if func.lower() not in ("true", "false"): -# functions.add(func.lower()) -# -# # Find type patterns from format_type rule -# type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL) -# if type_match: -# type_strings = re.findall(r'"(\w+)"', type_match.group(1)) -# types.update(t.lower() for t in type_strings) -# -# return { -# "keywords": sorted(keywords), -# "operators": sorted(operators), -# "functions": sorted(functions), -# "types": sorted(types), -# "literals": sorted(literals), -# } diff --git a/src/myfasthtml/core/dsl/lark_to_simple_mode.py b/src/myfasthtml/core/dsl/lark_to_simple_mode.py index 552ab19..100d4c1 100644 --- a/src/myfasthtml/core/dsl/lark_to_simple_mode.py +++ b/src/myfasthtml/core/dsl/lark_to_simple_mode.py @@ -1,12 +1,14 @@ """ Utilities for converting Lark grammars to CodeMirror 5 Simple Mode format. -This module provides functions to extract regex patterns from Lark grammar -terminals and generate a CodeMirror Simple Mode configuration for syntax highlighting. +This module provides functions to: +1. Extract regex patterns from Lark grammar terminals +2. Generate CodeMirror Simple Mode configuration for syntax highlighting +3. Extract completion items from Lark grammar (keywords, operators, etc.) """ import re -from typing import Dict, List, Any +from typing import Dict, List, Any, Set def lark_to_simple_mode(lark_grammar: str) -> Dict[str, Any]: @@ -238,3 +240,85 @@ def generate_formatting_dsl_mode() -> Dict[str, Any]: {"regex": r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", "token": "variable"}, ] } + + +def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]: + """ + Extract completion items from a Lark grammar. + + Parses the grammar to find: + - Keywords (reserved words like if, not, and) + - Operators (==, !=, contains, etc.) + - Functions (style, format, etc.) + - Types (number, date, boolean, etc.) + - Literals (True, False, etc.) + + Args: + lark_grammar: The Lark grammar string. + + Returns: + Dictionary with completion categories: + { + "keywords": [...], + "operators": [...], + "functions": [...], + "types": [...], + "literals": [...] + } + """ + keywords: Set[str] = set() + operators: Set[str] = set() + functions: Set[str] = set() + types: Set[str] = set() + literals: Set[str] = set() + + # Find all quoted strings (potential keywords/operators) + quoted_strings = re.findall(r'"([^"]+)"', lark_grammar) + + # Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False") + terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar) + for match in terminal_literals: + for literal in match: + if literal: + quoted_strings.append(literal) + + for s in quoted_strings: + s_lower = s.lower() + + # Classify based on pattern + if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"): + operators.add(s) + elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"): + operators.add(s_lower) + elif s_lower in ("if", "not", "and", "or"): + keywords.add(s_lower) + elif s_lower in ("true", "false"): + literals.add(s) + elif s_lower in ("style", "format"): + functions.add(s_lower) + elif s_lower in ("column", "row", "cell", "value", "col"): + keywords.add(s_lower) + elif s_lower in ("number", "date", "boolean", "text", "enum"): + types.add(s_lower) + elif s_lower == "case": + keywords.add(s_lower) + + # Find function-like patterns: word "(" + function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar) + for func in function_patterns: + if func.lower() not in ("true", "false"): + functions.add(func.lower()) + + # Find type patterns from format_type rule + type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL) + if type_match: + type_strings = re.findall(r'"(\w+)"', type_match.group(1)) + types.update(t.lower() for t in type_strings) + + return { + "keywords": sorted(keywords), + "operators": sorted(operators), + "functions": sorted(functions), + "types": sorted(types), + "literals": sorted(literals), + } diff --git a/src/myfasthtml/core/formatting/dsl/completion/suggestions.py b/src/myfasthtml/core/formatting/dsl/completion/suggestions.py index b5621f0..eae847d 100644 --- a/src/myfasthtml/core/formatting/dsl/completion/suggestions.py +++ b/src/myfasthtml/core/formatting/dsl/completion/suggestions.py @@ -283,9 +283,11 @@ def _get_column_value_suggestions( """Get column value suggestions based on the current scope.""" if not scope.column_name: return [] - + try: - values = provider.list_column_values(scope.column_name) + # Use table_name from scope, or empty string as fallback + table_name = scope.table_name or "" + values = provider.list_column_values(table_name, scope.column_name) suggestions = [] for value in values: if value is None: diff --git a/tests/core/dsl/test_lark_to_lezer.py b/tests/core/dsl/test_lark_to_lezer.py deleted file mode 100644 index aa1f353..0000000 --- a/tests/core/dsl/test_lark_to_lezer.py +++ /dev/null @@ -1,172 +0,0 @@ -"""Tests for lark_to_lezer module.""" - -import pytest - -from myfasthtml.core.dsl.lark_to_lezer import ( - extract_completions_from_grammar, - lark_to_lezer_grammar, -) - -# Sample grammars for testing -SIMPLE_GRAMMAR = r''' - start: rule+ - rule: "if" condition - condition: "value" operator literal - operator: "==" -> op_eq - | "!=" -> op_ne - | "contains" -> op_contains - literal: QUOTED_STRING -> string_literal - | BOOLEAN -> boolean_literal - QUOTED_STRING: /"[^"]*"/ - BOOLEAN: "True" | "False" -''' - -GRAMMAR_WITH_KEYWORDS = r''' - start: scope+ - scope: "column" NAME ":" rule - | "row" INTEGER ":" rule - | "cell" cell_ref ":" rule - rule: style_expr condition? - condition: "if" "not"? comparison - comparison: operand "and" operand - | operand "or" operand - style_expr: "style" "(" args ")" - operand: "value" | literal -''' - -GRAMMAR_WITH_TYPES = r''' - format_type: "number" -> fmt_number - | "date" -> fmt_date - | "boolean" -> fmt_boolean - | "text" -> fmt_text - | "enum" -> fmt_enum -''' - - -class TestExtractCompletions: - """Tests for extract_completions_from_grammar function.""" - - def test_i_can_extract_keywords_from_grammar(self): - """Test that keywords like if, not, and are extracted.""" - completions = extract_completions_from_grammar(GRAMMAR_WITH_KEYWORDS) - - assert "if" in completions["keywords"] - assert "not" in completions["keywords"] - assert "column" in completions["keywords"] - assert "row" in completions["keywords"] - assert "cell" in completions["keywords"] - assert "value" in completions["keywords"] - - @pytest.mark.parametrize( - "operator", - ["==", "!=", "contains"], - ) - def test_i_can_extract_operators_from_grammar(self, operator): - """Test that operators are extracted from grammar.""" - completions = extract_completions_from_grammar(SIMPLE_GRAMMAR) - - assert operator in completions["operators"] - - def test_i_can_extract_functions_from_grammar(self): - """Test that function-like constructs are extracted.""" - completions = extract_completions_from_grammar(GRAMMAR_WITH_KEYWORDS) - - assert "style" in completions["functions"] - - @pytest.mark.parametrize( - "type_name", - ["number", "date", "boolean", "text", "enum"], - ) - def test_i_can_extract_types_from_grammar(self, type_name): - """Test that type names are extracted from format_type rule.""" - completions = extract_completions_from_grammar(GRAMMAR_WITH_TYPES) - - assert type_name in completions["types"] - - @pytest.mark.parametrize("literal", [ - "True", - "False" - ]) - def test_i_can_extract_literals_from_grammar(self, literal): - """Test that literal values like True/False are extracted.""" - completions = extract_completions_from_grammar(SIMPLE_GRAMMAR) - - assert literal in completions["literals"] - - def test_i_can_extract_completions_returns_all_categories(self): - """Test that all completion categories are present in result.""" - completions = extract_completions_from_grammar(SIMPLE_GRAMMAR) - - assert "keywords" in completions - assert "operators" in completions - assert "functions" in completions - assert "types" in completions - assert "literals" in completions - - def test_i_can_extract_completions_returns_sorted_lists(self): - """Test that completion lists are sorted alphabetically.""" - completions = extract_completions_from_grammar(SIMPLE_GRAMMAR) - - for category in completions.values(): - assert category == sorted(category) - - -class TestLarkToLezerConversion: - """Tests for lark_to_lezer_grammar function.""" - - def test_i_can_convert_simple_grammar_to_lezer(self): - """Test that a simple Lark grammar is converted to Lezer format.""" - lezer = lark_to_lezer_grammar(SIMPLE_GRAMMAR) - - # Should have @top directive - assert "@top Start" in lezer - # Should have @tokens block - assert "@tokens {" in lezer - # Should have @skip directive - assert "@skip {" in lezer - - def test_i_can_convert_rule_names_to_pascal_case(self): - """Test that snake_case rule names become PascalCase.""" - grammar = r''' - my_rule: other_rule - other_rule: "test" - ''' - lezer = lark_to_lezer_grammar(grammar) - - assert "MyRule" in lezer - assert "OtherRule" in lezer - - def test_i_cannot_include_internal_rules_in_lezer(self): - """Test that rules starting with _ are not included.""" - grammar = r''' - start: rule _NL - rule: "test" - _NL: /\n/ - ''' - lezer = lark_to_lezer_grammar(grammar) - - # Internal rules should not appear as Lezer rules - assert "Nl {" not in lezer - - def test_i_can_convert_terminal_regex_to_lezer(self): - """Test that terminal regex patterns are converted.""" - grammar = r''' - NAME: /[a-zA-Z_][a-zA-Z0-9_]*/ - ''' - lezer = lark_to_lezer_grammar(grammar) - - assert "NAME" in lezer - - @pytest.mark.parametrize( - "terminal,pattern", - [ - ('BOOLEAN: "True" | "False"', "BOOLEAN"), - ('KEYWORD: "if"', "KEYWORD"), - ], - ) - def test_i_can_convert_terminal_strings_to_lezer(self, terminal, pattern): - """Test that terminal string literals are converted.""" - grammar = f"start: test\n{terminal}" - lezer = lark_to_lezer_grammar(grammar) - - assert pattern in lezer diff --git a/tests/core/formatting/dsl/test_completion.py b/tests/core/formatting/dsl/test_completion.py index 68de035..b57c41a 100644 --- a/tests/core/formatting/dsl/test_completion.py +++ b/tests/core/formatting/dsl/test_completion.py @@ -34,13 +34,13 @@ class MockProvider: Provides predefined data for columns, values, and presets. """ - def get_tables(self) -> list[str]: + def list_tables(self) -> list[str]: return ["app.orders"] - def get_columns(self, table: str) -> list[str]: + def list_columns(self, table: str) -> list[str]: return ["id", "amount", "status"] - def get_column_values(self, column: str) -> list[Any]: + def list_column_values(self, table: str, column: str) -> list[Any]: if column == "status": return ["draft", "pending", "approved"] if column == "amount": @@ -50,10 +50,10 @@ class MockProvider: def get_row_count(self, table: str) -> int: return 150 - def get_style_presets(self) -> list[str]: + def list_style_presets(self) -> list[str]: return ["custom_highlight"] - def get_format_presets(self) -> list[str]: + def list_format_presets(self) -> list[str]: return ["CHF"] diff --git a/tests/core/formatting/test_formatting_dsl_definition.py b/tests/core/formatting/test_formatting_dsl_definition.py index 3ac5d1f..4213349 100644 --- a/tests/core/formatting/test_formatting_dsl_definition.py +++ b/tests/core/formatting/test_formatting_dsl_definition.py @@ -84,14 +84,14 @@ class TestFormattingDSL: assert completions1 is completions2 - def test_i_can_get_lezer_grammar_is_cached(self): - """Test that lezer_grammar property is cached (same object returned).""" + def test_i_can_get_simple_mode_config_is_cached(self): + """Test that simple_mode_config property is cached (same object returned).""" dsl = FormattingDSL() - lezer1 = dsl.lezer_grammar - lezer2 = dsl.lezer_grammar + config1 = dsl.simple_mode_config + config2 = dsl.simple_mode_config - assert lezer1 is lezer2 + assert config1 is config2 def test_i_can_get_editor_config(self): """Test that get_editor_config() returns expected structure.""" @@ -100,6 +100,7 @@ class TestFormattingDSL: config = dsl.get_editor_config() assert "name" in config - assert "lezerGrammar" in config + assert "simpleModeConfig" in config assert "completions" in config assert config["name"] == "Formatting DSL" + assert "start" in config["simpleModeConfig"] # Simple Mode structure