Added syntax colorization. Remove all references to deprecated lark_to_lezer module.
This commit is contained in:
@@ -9,10 +9,7 @@ from abc import ABC, abstractmethod
|
||||
from functools import cached_property
|
||||
from typing import List, Dict, Any
|
||||
|
||||
# TODO: Replace with lark_to_simple_mode when implemented
|
||||
from myfasthtml.core.dsl.lark_to_lezer import (
|
||||
extract_completions_from_grammar, # Will be moved to utils.py
|
||||
)
|
||||
from myfasthtml.core.dsl.lark_to_simple_mode import extract_completions_from_grammar
|
||||
from myfasthtml.core.utils import make_safe_id
|
||||
|
||||
|
||||
@@ -82,13 +79,13 @@ class DSLDefinition(ABC):
|
||||
|
||||
Returns:
|
||||
Dictionary with:
|
||||
- 'lezerGrammar': The Lezer grammar string
|
||||
- 'simpleModeConfig': The CodeMirror Simple Mode configuration
|
||||
- 'completions': The completion items
|
||||
- 'name': The DSL name
|
||||
"""
|
||||
return {
|
||||
"name": self.name,
|
||||
"lezerGrammar": self.lezer_grammar,
|
||||
"simpleModeConfig": self.simple_mode_config,
|
||||
"completions": self.completions,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,267 +0,0 @@
|
||||
# """
|
||||
# DEPRECATED: Utilities for converting Lark grammars to Lezer format.
|
||||
#
|
||||
# ⚠️ WARNING: This module is deprecated and will be removed in a future version.
|
||||
#
|
||||
# Original purpose:
|
||||
# - Transform a Lark grammar to a Lezer grammar for CodeMirror 6
|
||||
# - Extract completion items (keywords, operators, etc.) from a Lark grammar
|
||||
#
|
||||
# Deprecation reason:
|
||||
# - CodeMirror 6 requires a bundler (Webpack, Rollup, etc.)
|
||||
# - Incompatible with FastHTML's direct script inclusion approach
|
||||
# - Replaced by CodeMirror 5 Simple Mode (see lark_to_simple_mode.py)
|
||||
#
|
||||
# Migration path:
|
||||
# - Use lark_to_simple_mode.py for CodeMirror 5 syntax highlighting
|
||||
# - extract_completions_from_grammar() is still used and will be moved to utils.py
|
||||
# """
|
||||
#
|
||||
# import re
|
||||
# from typing import Dict, List, Set
|
||||
#
|
||||
#
|
||||
# def lark_to_lezer_grammar(lark_grammar: str) -> str:
|
||||
# """
|
||||
# Convert a Lark grammar to a Lezer grammar.
|
||||
#
|
||||
# This is a simplified converter that handles common Lark patterns.
|
||||
# Complex grammars may require manual adjustment.
|
||||
#
|
||||
# Args:
|
||||
# lark_grammar: The Lark grammar string.
|
||||
#
|
||||
# Returns:
|
||||
# The Lezer grammar string.
|
||||
# """
|
||||
# lines = lark_grammar.strip().split("\n")
|
||||
# lezer_rules = []
|
||||
# tokens = []
|
||||
#
|
||||
# for line in lines:
|
||||
# line = line.strip()
|
||||
#
|
||||
# # Skip empty lines and comments
|
||||
# if not line or line.startswith("//") or line.startswith("#"):
|
||||
# continue
|
||||
#
|
||||
# # Skip Lark-specific directives
|
||||
# if line.startswith("%"):
|
||||
# continue
|
||||
#
|
||||
# # Parse rule definitions (lowercase names only)
|
||||
# rule_match = re.match(r"^([a-z_][a-z0-9_]*)\s*:\s*(.+)$", line)
|
||||
# if rule_match:
|
||||
# name, body = rule_match.groups()
|
||||
# lezer_rule = _convert_rule(name, body)
|
||||
# if lezer_rule:
|
||||
# lezer_rules.append(lezer_rule)
|
||||
# continue
|
||||
#
|
||||
# # Parse terminal definitions (uppercase names)
|
||||
# terminal_match = re.match(r"^([A-Z_][A-Z0-9_]*)\s*:\s*(.+)$", line)
|
||||
# if terminal_match:
|
||||
# name, pattern = terminal_match.groups()
|
||||
# token = _convert_terminal(name, pattern)
|
||||
# if token:
|
||||
# tokens.append(token)
|
||||
#
|
||||
# # Build Lezer grammar
|
||||
# lezer_output = ["@top Start { scope+ }", ""]
|
||||
#
|
||||
# # Add rules
|
||||
# for rule in lezer_rules:
|
||||
# lezer_output.append(rule)
|
||||
#
|
||||
# lezer_output.append("")
|
||||
# lezer_output.append("@tokens {")
|
||||
#
|
||||
# # Add tokens
|
||||
# for token in tokens:
|
||||
# lezer_output.append(f" {token}")
|
||||
#
|
||||
# # Add common tokens
|
||||
# lezer_output.extend([
|
||||
# ' whitespace { $[ \\t]+ }',
|
||||
# ' newline { $[\\n\\r] }',
|
||||
# ' Comment { "#" ![$\\n]* }',
|
||||
# ])
|
||||
#
|
||||
# lezer_output.append("}")
|
||||
# lezer_output.append("")
|
||||
# lezer_output.append("@skip { whitespace | Comment }")
|
||||
#
|
||||
# return "\n".join(lezer_output)
|
||||
#
|
||||
#
|
||||
# def _convert_rule(name: str, body: str) -> str:
|
||||
# """Convert a single Lark rule to Lezer format."""
|
||||
# # Skip internal rules (starting with _)
|
||||
# if name.startswith("_"):
|
||||
# return ""
|
||||
#
|
||||
# # Convert rule name to PascalCase for Lezer
|
||||
# lezer_name = _to_pascal_case(name)
|
||||
#
|
||||
# # Convert body
|
||||
# lezer_body = _convert_body(body)
|
||||
#
|
||||
# if lezer_body:
|
||||
# return f"{lezer_name} {{ {lezer_body} }}"
|
||||
# return ""
|
||||
#
|
||||
#
|
||||
# def _convert_terminal(name: str, pattern: str) -> str:
|
||||
# """Convert a Lark terminal to Lezer token format."""
|
||||
# pattern = pattern.strip()
|
||||
#
|
||||
# # Handle regex patterns
|
||||
# if pattern.startswith("/") and pattern.endswith("/"):
|
||||
# regex = pattern[1:-1]
|
||||
# # Convert to Lezer regex format
|
||||
# return f'{name} {{ ${regex}$ }}'
|
||||
#
|
||||
# # Handle string literals
|
||||
# if pattern.startswith('"') or pattern.startswith("'"):
|
||||
# return f'{name} {{ {pattern} }}'
|
||||
#
|
||||
# # Handle alternatives (literal strings separated by |)
|
||||
# if "|" in pattern:
|
||||
# alternatives = [alt.strip() for alt in pattern.split("|")]
|
||||
# if all(alt.startswith('"') or alt.startswith("'") for alt in alternatives):
|
||||
# return f'{name} {{ {" | ".join(alternatives)} }}'
|
||||
#
|
||||
# return ""
|
||||
#
|
||||
#
|
||||
# def _convert_body(body: str) -> str:
|
||||
# """Convert the body of a Lark rule to Lezer format."""
|
||||
# # Remove inline transformations (-> name)
|
||||
# body = re.sub(r"\s*->\s*\w+", "", body)
|
||||
#
|
||||
# # Convert alternatives
|
||||
# parts = []
|
||||
# for alt in body.split("|"):
|
||||
# alt = alt.strip()
|
||||
# if alt:
|
||||
# converted = _convert_sequence(alt)
|
||||
# if converted:
|
||||
# parts.append(converted)
|
||||
#
|
||||
# return " | ".join(parts)
|
||||
#
|
||||
#
|
||||
# def _convert_sequence(seq: str) -> str:
|
||||
# """Convert a sequence of items in a rule."""
|
||||
# items = []
|
||||
#
|
||||
# # Tokenize the sequence
|
||||
# tokens = re.findall(
|
||||
# r'"[^"]*"|\'[^\']*\'|/[^/]+/|\([^)]+\)|\[[^\]]+\]|[a-zA-Z_][a-zA-Z0-9_]*|\?|\*|\+',
|
||||
# seq
|
||||
# )
|
||||
#
|
||||
# for token in tokens:
|
||||
# if token.startswith('"') or token.startswith("'"):
|
||||
# # String literal
|
||||
# items.append(token)
|
||||
# elif token.startswith("("):
|
||||
# # Group
|
||||
# inner = token[1:-1]
|
||||
# items.append(f"({_convert_body(inner)})")
|
||||
# elif token.startswith("["):
|
||||
# # Optional group in Lark
|
||||
# inner = token[1:-1]
|
||||
# items.append(f"({_convert_body(inner)})?")
|
||||
# elif token in ("?", "*", "+"):
|
||||
# # Quantifiers - attach to previous item
|
||||
# if items:
|
||||
# items[-1] = items[-1] + token
|
||||
# elif token.isupper() or token.startswith("_"):
|
||||
# # Terminal reference
|
||||
# items.append(token)
|
||||
# elif token.islower() or "_" in token:
|
||||
# # Rule reference - convert to PascalCase
|
||||
# items.append(_to_pascal_case(token))
|
||||
#
|
||||
# return " ".join(items)
|
||||
#
|
||||
#
|
||||
# def _to_pascal_case(name: str) -> str:
|
||||
# """Convert snake_case to PascalCase."""
|
||||
# return "".join(word.capitalize() for word in name.split("_"))
|
||||
#
|
||||
#
|
||||
# def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]:
|
||||
# """
|
||||
# Extract completion items from a Lark grammar.
|
||||
#
|
||||
# Parses the grammar to find:
|
||||
# - Keywords (reserved words like if, not, and)
|
||||
# - Operators (==, !=, contains, etc.)
|
||||
# - Functions (style, format, etc.)
|
||||
# - Types (number, date, boolean, etc.)
|
||||
# - Literals (True, False, etc.)
|
||||
#
|
||||
# Args:
|
||||
# lark_grammar: The Lark grammar string.
|
||||
#
|
||||
# Returns:
|
||||
# Dictionary with completion categories.
|
||||
# """
|
||||
# keywords: Set[str] = set()
|
||||
# operators: Set[str] = set()
|
||||
# functions: Set[str] = set()
|
||||
# types: Set[str] = set()
|
||||
# literals: Set[str] = set()
|
||||
#
|
||||
# # Find all quoted strings (potential keywords/operators)
|
||||
# quoted_strings = re.findall(r'"([^"]+)"', lark_grammar)
|
||||
#
|
||||
# # Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False")
|
||||
# terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar)
|
||||
# for match in terminal_literals:
|
||||
# for literal in match:
|
||||
# if literal:
|
||||
# quoted_strings.append(literal)
|
||||
#
|
||||
# for s in quoted_strings:
|
||||
# s_lower = s.lower()
|
||||
#
|
||||
# # Classify based on pattern
|
||||
# if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"):
|
||||
# operators.add(s)
|
||||
# elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"):
|
||||
# operators.add(s_lower)
|
||||
# elif s_lower in ("if", "not", "and", "or"):
|
||||
# keywords.add(s_lower)
|
||||
# elif s_lower in ("true", "false"):
|
||||
# literals.add(s)
|
||||
# elif s_lower in ("style", "format"):
|
||||
# functions.add(s_lower)
|
||||
# elif s_lower in ("column", "row", "cell", "value", "col"):
|
||||
# keywords.add(s_lower)
|
||||
# elif s_lower in ("number", "date", "boolean", "text", "enum"):
|
||||
# types.add(s_lower)
|
||||
# elif s_lower == "case":
|
||||
# keywords.add(s_lower)
|
||||
#
|
||||
# # Find function-like patterns: word "("
|
||||
# function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar)
|
||||
# for func in function_patterns:
|
||||
# if func.lower() not in ("true", "false"):
|
||||
# functions.add(func.lower())
|
||||
#
|
||||
# # Find type patterns from format_type rule
|
||||
# type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL)
|
||||
# if type_match:
|
||||
# type_strings = re.findall(r'"(\w+)"', type_match.group(1))
|
||||
# types.update(t.lower() for t in type_strings)
|
||||
#
|
||||
# return {
|
||||
# "keywords": sorted(keywords),
|
||||
# "operators": sorted(operators),
|
||||
# "functions": sorted(functions),
|
||||
# "types": sorted(types),
|
||||
# "literals": sorted(literals),
|
||||
# }
|
||||
@@ -1,12 +1,14 @@
|
||||
"""
|
||||
Utilities for converting Lark grammars to CodeMirror 5 Simple Mode format.
|
||||
|
||||
This module provides functions to extract regex patterns from Lark grammar
|
||||
terminals and generate a CodeMirror Simple Mode configuration for syntax highlighting.
|
||||
This module provides functions to:
|
||||
1. Extract regex patterns from Lark grammar terminals
|
||||
2. Generate CodeMirror Simple Mode configuration for syntax highlighting
|
||||
3. Extract completion items from Lark grammar (keywords, operators, etc.)
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Dict, List, Any
|
||||
from typing import Dict, List, Any, Set
|
||||
|
||||
|
||||
def lark_to_simple_mode(lark_grammar: str) -> Dict[str, Any]:
|
||||
@@ -238,3 +240,85 @@ def generate_formatting_dsl_mode() -> Dict[str, Any]:
|
||||
{"regex": r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", "token": "variable"},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Extract completion items from a Lark grammar.
|
||||
|
||||
Parses the grammar to find:
|
||||
- Keywords (reserved words like if, not, and)
|
||||
- Operators (==, !=, contains, etc.)
|
||||
- Functions (style, format, etc.)
|
||||
- Types (number, date, boolean, etc.)
|
||||
- Literals (True, False, etc.)
|
||||
|
||||
Args:
|
||||
lark_grammar: The Lark grammar string.
|
||||
|
||||
Returns:
|
||||
Dictionary with completion categories:
|
||||
{
|
||||
"keywords": [...],
|
||||
"operators": [...],
|
||||
"functions": [...],
|
||||
"types": [...],
|
||||
"literals": [...]
|
||||
}
|
||||
"""
|
||||
keywords: Set[str] = set()
|
||||
operators: Set[str] = set()
|
||||
functions: Set[str] = set()
|
||||
types: Set[str] = set()
|
||||
literals: Set[str] = set()
|
||||
|
||||
# Find all quoted strings (potential keywords/operators)
|
||||
quoted_strings = re.findall(r'"([^"]+)"', lark_grammar)
|
||||
|
||||
# Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False")
|
||||
terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar)
|
||||
for match in terminal_literals:
|
||||
for literal in match:
|
||||
if literal:
|
||||
quoted_strings.append(literal)
|
||||
|
||||
for s in quoted_strings:
|
||||
s_lower = s.lower()
|
||||
|
||||
# Classify based on pattern
|
||||
if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"):
|
||||
operators.add(s)
|
||||
elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"):
|
||||
operators.add(s_lower)
|
||||
elif s_lower in ("if", "not", "and", "or"):
|
||||
keywords.add(s_lower)
|
||||
elif s_lower in ("true", "false"):
|
||||
literals.add(s)
|
||||
elif s_lower in ("style", "format"):
|
||||
functions.add(s_lower)
|
||||
elif s_lower in ("column", "row", "cell", "value", "col"):
|
||||
keywords.add(s_lower)
|
||||
elif s_lower in ("number", "date", "boolean", "text", "enum"):
|
||||
types.add(s_lower)
|
||||
elif s_lower == "case":
|
||||
keywords.add(s_lower)
|
||||
|
||||
# Find function-like patterns: word "("
|
||||
function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar)
|
||||
for func in function_patterns:
|
||||
if func.lower() not in ("true", "false"):
|
||||
functions.add(func.lower())
|
||||
|
||||
# Find type patterns from format_type rule
|
||||
type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL)
|
||||
if type_match:
|
||||
type_strings = re.findall(r'"(\w+)"', type_match.group(1))
|
||||
types.update(t.lower() for t in type_strings)
|
||||
|
||||
return {
|
||||
"keywords": sorted(keywords),
|
||||
"operators": sorted(operators),
|
||||
"functions": sorted(functions),
|
||||
"types": sorted(types),
|
||||
"literals": sorted(literals),
|
||||
}
|
||||
|
||||
@@ -283,9 +283,11 @@ def _get_column_value_suggestions(
|
||||
"""Get column value suggestions based on the current scope."""
|
||||
if not scope.column_name:
|
||||
return []
|
||||
|
||||
|
||||
try:
|
||||
values = provider.list_column_values(scope.column_name)
|
||||
# Use table_name from scope, or empty string as fallback
|
||||
table_name = scope.table_name or ""
|
||||
values = provider.list_column_values(table_name, scope.column_name)
|
||||
suggestions = []
|
||||
for value in values:
|
||||
if value is None:
|
||||
|
||||
Reference in New Issue
Block a user