Added syntax colorization. Remove all references to deprecated lark_to_lezer module.
This commit is contained in:
@@ -9,10 +9,7 @@ from abc import ABC, abstractmethod
|
|||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
from typing import List, Dict, Any
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
# TODO: Replace with lark_to_simple_mode when implemented
|
from myfasthtml.core.dsl.lark_to_simple_mode import extract_completions_from_grammar
|
||||||
from myfasthtml.core.dsl.lark_to_lezer import (
|
|
||||||
extract_completions_from_grammar, # Will be moved to utils.py
|
|
||||||
)
|
|
||||||
from myfasthtml.core.utils import make_safe_id
|
from myfasthtml.core.utils import make_safe_id
|
||||||
|
|
||||||
|
|
||||||
@@ -82,13 +79,13 @@ class DSLDefinition(ABC):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dictionary with:
|
Dictionary with:
|
||||||
- 'lezerGrammar': The Lezer grammar string
|
- 'simpleModeConfig': The CodeMirror Simple Mode configuration
|
||||||
- 'completions': The completion items
|
- 'completions': The completion items
|
||||||
- 'name': The DSL name
|
- 'name': The DSL name
|
||||||
"""
|
"""
|
||||||
return {
|
return {
|
||||||
"name": self.name,
|
"name": self.name,
|
||||||
"lezerGrammar": self.lezer_grammar,
|
"simpleModeConfig": self.simple_mode_config,
|
||||||
"completions": self.completions,
|
"completions": self.completions,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,267 +0,0 @@
|
|||||||
# """
|
|
||||||
# DEPRECATED: Utilities for converting Lark grammars to Lezer format.
|
|
||||||
#
|
|
||||||
# ⚠️ WARNING: This module is deprecated and will be removed in a future version.
|
|
||||||
#
|
|
||||||
# Original purpose:
|
|
||||||
# - Transform a Lark grammar to a Lezer grammar for CodeMirror 6
|
|
||||||
# - Extract completion items (keywords, operators, etc.) from a Lark grammar
|
|
||||||
#
|
|
||||||
# Deprecation reason:
|
|
||||||
# - CodeMirror 6 requires a bundler (Webpack, Rollup, etc.)
|
|
||||||
# - Incompatible with FastHTML's direct script inclusion approach
|
|
||||||
# - Replaced by CodeMirror 5 Simple Mode (see lark_to_simple_mode.py)
|
|
||||||
#
|
|
||||||
# Migration path:
|
|
||||||
# - Use lark_to_simple_mode.py for CodeMirror 5 syntax highlighting
|
|
||||||
# - extract_completions_from_grammar() is still used and will be moved to utils.py
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# import re
|
|
||||||
# from typing import Dict, List, Set
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# def lark_to_lezer_grammar(lark_grammar: str) -> str:
|
|
||||||
# """
|
|
||||||
# Convert a Lark grammar to a Lezer grammar.
|
|
||||||
#
|
|
||||||
# This is a simplified converter that handles common Lark patterns.
|
|
||||||
# Complex grammars may require manual adjustment.
|
|
||||||
#
|
|
||||||
# Args:
|
|
||||||
# lark_grammar: The Lark grammar string.
|
|
||||||
#
|
|
||||||
# Returns:
|
|
||||||
# The Lezer grammar string.
|
|
||||||
# """
|
|
||||||
# lines = lark_grammar.strip().split("\n")
|
|
||||||
# lezer_rules = []
|
|
||||||
# tokens = []
|
|
||||||
#
|
|
||||||
# for line in lines:
|
|
||||||
# line = line.strip()
|
|
||||||
#
|
|
||||||
# # Skip empty lines and comments
|
|
||||||
# if not line or line.startswith("//") or line.startswith("#"):
|
|
||||||
# continue
|
|
||||||
#
|
|
||||||
# # Skip Lark-specific directives
|
|
||||||
# if line.startswith("%"):
|
|
||||||
# continue
|
|
||||||
#
|
|
||||||
# # Parse rule definitions (lowercase names only)
|
|
||||||
# rule_match = re.match(r"^([a-z_][a-z0-9_]*)\s*:\s*(.+)$", line)
|
|
||||||
# if rule_match:
|
|
||||||
# name, body = rule_match.groups()
|
|
||||||
# lezer_rule = _convert_rule(name, body)
|
|
||||||
# if lezer_rule:
|
|
||||||
# lezer_rules.append(lezer_rule)
|
|
||||||
# continue
|
|
||||||
#
|
|
||||||
# # Parse terminal definitions (uppercase names)
|
|
||||||
# terminal_match = re.match(r"^([A-Z_][A-Z0-9_]*)\s*:\s*(.+)$", line)
|
|
||||||
# if terminal_match:
|
|
||||||
# name, pattern = terminal_match.groups()
|
|
||||||
# token = _convert_terminal(name, pattern)
|
|
||||||
# if token:
|
|
||||||
# tokens.append(token)
|
|
||||||
#
|
|
||||||
# # Build Lezer grammar
|
|
||||||
# lezer_output = ["@top Start { scope+ }", ""]
|
|
||||||
#
|
|
||||||
# # Add rules
|
|
||||||
# for rule in lezer_rules:
|
|
||||||
# lezer_output.append(rule)
|
|
||||||
#
|
|
||||||
# lezer_output.append("")
|
|
||||||
# lezer_output.append("@tokens {")
|
|
||||||
#
|
|
||||||
# # Add tokens
|
|
||||||
# for token in tokens:
|
|
||||||
# lezer_output.append(f" {token}")
|
|
||||||
#
|
|
||||||
# # Add common tokens
|
|
||||||
# lezer_output.extend([
|
|
||||||
# ' whitespace { $[ \\t]+ }',
|
|
||||||
# ' newline { $[\\n\\r] }',
|
|
||||||
# ' Comment { "#" ![$\\n]* }',
|
|
||||||
# ])
|
|
||||||
#
|
|
||||||
# lezer_output.append("}")
|
|
||||||
# lezer_output.append("")
|
|
||||||
# lezer_output.append("@skip { whitespace | Comment }")
|
|
||||||
#
|
|
||||||
# return "\n".join(lezer_output)
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# def _convert_rule(name: str, body: str) -> str:
|
|
||||||
# """Convert a single Lark rule to Lezer format."""
|
|
||||||
# # Skip internal rules (starting with _)
|
|
||||||
# if name.startswith("_"):
|
|
||||||
# return ""
|
|
||||||
#
|
|
||||||
# # Convert rule name to PascalCase for Lezer
|
|
||||||
# lezer_name = _to_pascal_case(name)
|
|
||||||
#
|
|
||||||
# # Convert body
|
|
||||||
# lezer_body = _convert_body(body)
|
|
||||||
#
|
|
||||||
# if lezer_body:
|
|
||||||
# return f"{lezer_name} {{ {lezer_body} }}"
|
|
||||||
# return ""
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# def _convert_terminal(name: str, pattern: str) -> str:
|
|
||||||
# """Convert a Lark terminal to Lezer token format."""
|
|
||||||
# pattern = pattern.strip()
|
|
||||||
#
|
|
||||||
# # Handle regex patterns
|
|
||||||
# if pattern.startswith("/") and pattern.endswith("/"):
|
|
||||||
# regex = pattern[1:-1]
|
|
||||||
# # Convert to Lezer regex format
|
|
||||||
# return f'{name} {{ ${regex}$ }}'
|
|
||||||
#
|
|
||||||
# # Handle string literals
|
|
||||||
# if pattern.startswith('"') or pattern.startswith("'"):
|
|
||||||
# return f'{name} {{ {pattern} }}'
|
|
||||||
#
|
|
||||||
# # Handle alternatives (literal strings separated by |)
|
|
||||||
# if "|" in pattern:
|
|
||||||
# alternatives = [alt.strip() for alt in pattern.split("|")]
|
|
||||||
# if all(alt.startswith('"') or alt.startswith("'") for alt in alternatives):
|
|
||||||
# return f'{name} {{ {" | ".join(alternatives)} }}'
|
|
||||||
#
|
|
||||||
# return ""
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# def _convert_body(body: str) -> str:
|
|
||||||
# """Convert the body of a Lark rule to Lezer format."""
|
|
||||||
# # Remove inline transformations (-> name)
|
|
||||||
# body = re.sub(r"\s*->\s*\w+", "", body)
|
|
||||||
#
|
|
||||||
# # Convert alternatives
|
|
||||||
# parts = []
|
|
||||||
# for alt in body.split("|"):
|
|
||||||
# alt = alt.strip()
|
|
||||||
# if alt:
|
|
||||||
# converted = _convert_sequence(alt)
|
|
||||||
# if converted:
|
|
||||||
# parts.append(converted)
|
|
||||||
#
|
|
||||||
# return " | ".join(parts)
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# def _convert_sequence(seq: str) -> str:
|
|
||||||
# """Convert a sequence of items in a rule."""
|
|
||||||
# items = []
|
|
||||||
#
|
|
||||||
# # Tokenize the sequence
|
|
||||||
# tokens = re.findall(
|
|
||||||
# r'"[^"]*"|\'[^\']*\'|/[^/]+/|\([^)]+\)|\[[^\]]+\]|[a-zA-Z_][a-zA-Z0-9_]*|\?|\*|\+',
|
|
||||||
# seq
|
|
||||||
# )
|
|
||||||
#
|
|
||||||
# for token in tokens:
|
|
||||||
# if token.startswith('"') or token.startswith("'"):
|
|
||||||
# # String literal
|
|
||||||
# items.append(token)
|
|
||||||
# elif token.startswith("("):
|
|
||||||
# # Group
|
|
||||||
# inner = token[1:-1]
|
|
||||||
# items.append(f"({_convert_body(inner)})")
|
|
||||||
# elif token.startswith("["):
|
|
||||||
# # Optional group in Lark
|
|
||||||
# inner = token[1:-1]
|
|
||||||
# items.append(f"({_convert_body(inner)})?")
|
|
||||||
# elif token in ("?", "*", "+"):
|
|
||||||
# # Quantifiers - attach to previous item
|
|
||||||
# if items:
|
|
||||||
# items[-1] = items[-1] + token
|
|
||||||
# elif token.isupper() or token.startswith("_"):
|
|
||||||
# # Terminal reference
|
|
||||||
# items.append(token)
|
|
||||||
# elif token.islower() or "_" in token:
|
|
||||||
# # Rule reference - convert to PascalCase
|
|
||||||
# items.append(_to_pascal_case(token))
|
|
||||||
#
|
|
||||||
# return " ".join(items)
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# def _to_pascal_case(name: str) -> str:
|
|
||||||
# """Convert snake_case to PascalCase."""
|
|
||||||
# return "".join(word.capitalize() for word in name.split("_"))
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]:
|
|
||||||
# """
|
|
||||||
# Extract completion items from a Lark grammar.
|
|
||||||
#
|
|
||||||
# Parses the grammar to find:
|
|
||||||
# - Keywords (reserved words like if, not, and)
|
|
||||||
# - Operators (==, !=, contains, etc.)
|
|
||||||
# - Functions (style, format, etc.)
|
|
||||||
# - Types (number, date, boolean, etc.)
|
|
||||||
# - Literals (True, False, etc.)
|
|
||||||
#
|
|
||||||
# Args:
|
|
||||||
# lark_grammar: The Lark grammar string.
|
|
||||||
#
|
|
||||||
# Returns:
|
|
||||||
# Dictionary with completion categories.
|
|
||||||
# """
|
|
||||||
# keywords: Set[str] = set()
|
|
||||||
# operators: Set[str] = set()
|
|
||||||
# functions: Set[str] = set()
|
|
||||||
# types: Set[str] = set()
|
|
||||||
# literals: Set[str] = set()
|
|
||||||
#
|
|
||||||
# # Find all quoted strings (potential keywords/operators)
|
|
||||||
# quoted_strings = re.findall(r'"([^"]+)"', lark_grammar)
|
|
||||||
#
|
|
||||||
# # Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False")
|
|
||||||
# terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar)
|
|
||||||
# for match in terminal_literals:
|
|
||||||
# for literal in match:
|
|
||||||
# if literal:
|
|
||||||
# quoted_strings.append(literal)
|
|
||||||
#
|
|
||||||
# for s in quoted_strings:
|
|
||||||
# s_lower = s.lower()
|
|
||||||
#
|
|
||||||
# # Classify based on pattern
|
|
||||||
# if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"):
|
|
||||||
# operators.add(s)
|
|
||||||
# elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"):
|
|
||||||
# operators.add(s_lower)
|
|
||||||
# elif s_lower in ("if", "not", "and", "or"):
|
|
||||||
# keywords.add(s_lower)
|
|
||||||
# elif s_lower in ("true", "false"):
|
|
||||||
# literals.add(s)
|
|
||||||
# elif s_lower in ("style", "format"):
|
|
||||||
# functions.add(s_lower)
|
|
||||||
# elif s_lower in ("column", "row", "cell", "value", "col"):
|
|
||||||
# keywords.add(s_lower)
|
|
||||||
# elif s_lower in ("number", "date", "boolean", "text", "enum"):
|
|
||||||
# types.add(s_lower)
|
|
||||||
# elif s_lower == "case":
|
|
||||||
# keywords.add(s_lower)
|
|
||||||
#
|
|
||||||
# # Find function-like patterns: word "("
|
|
||||||
# function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar)
|
|
||||||
# for func in function_patterns:
|
|
||||||
# if func.lower() not in ("true", "false"):
|
|
||||||
# functions.add(func.lower())
|
|
||||||
#
|
|
||||||
# # Find type patterns from format_type rule
|
|
||||||
# type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL)
|
|
||||||
# if type_match:
|
|
||||||
# type_strings = re.findall(r'"(\w+)"', type_match.group(1))
|
|
||||||
# types.update(t.lower() for t in type_strings)
|
|
||||||
#
|
|
||||||
# return {
|
|
||||||
# "keywords": sorted(keywords),
|
|
||||||
# "operators": sorted(operators),
|
|
||||||
# "functions": sorted(functions),
|
|
||||||
# "types": sorted(types),
|
|
||||||
# "literals": sorted(literals),
|
|
||||||
# }
|
|
||||||
@@ -1,12 +1,14 @@
|
|||||||
"""
|
"""
|
||||||
Utilities for converting Lark grammars to CodeMirror 5 Simple Mode format.
|
Utilities for converting Lark grammars to CodeMirror 5 Simple Mode format.
|
||||||
|
|
||||||
This module provides functions to extract regex patterns from Lark grammar
|
This module provides functions to:
|
||||||
terminals and generate a CodeMirror Simple Mode configuration for syntax highlighting.
|
1. Extract regex patterns from Lark grammar terminals
|
||||||
|
2. Generate CodeMirror Simple Mode configuration for syntax highlighting
|
||||||
|
3. Extract completion items from Lark grammar (keywords, operators, etc.)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from typing import Dict, List, Any
|
from typing import Dict, List, Any, Set
|
||||||
|
|
||||||
|
|
||||||
def lark_to_simple_mode(lark_grammar: str) -> Dict[str, Any]:
|
def lark_to_simple_mode(lark_grammar: str) -> Dict[str, Any]:
|
||||||
@@ -238,3 +240,85 @@ def generate_formatting_dsl_mode() -> Dict[str, Any]:
|
|||||||
{"regex": r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", "token": "variable"},
|
{"regex": r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", "token": "variable"},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]:
|
||||||
|
"""
|
||||||
|
Extract completion items from a Lark grammar.
|
||||||
|
|
||||||
|
Parses the grammar to find:
|
||||||
|
- Keywords (reserved words like if, not, and)
|
||||||
|
- Operators (==, !=, contains, etc.)
|
||||||
|
- Functions (style, format, etc.)
|
||||||
|
- Types (number, date, boolean, etc.)
|
||||||
|
- Literals (True, False, etc.)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lark_grammar: The Lark grammar string.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with completion categories:
|
||||||
|
{
|
||||||
|
"keywords": [...],
|
||||||
|
"operators": [...],
|
||||||
|
"functions": [...],
|
||||||
|
"types": [...],
|
||||||
|
"literals": [...]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
keywords: Set[str] = set()
|
||||||
|
operators: Set[str] = set()
|
||||||
|
functions: Set[str] = set()
|
||||||
|
types: Set[str] = set()
|
||||||
|
literals: Set[str] = set()
|
||||||
|
|
||||||
|
# Find all quoted strings (potential keywords/operators)
|
||||||
|
quoted_strings = re.findall(r'"([^"]+)"', lark_grammar)
|
||||||
|
|
||||||
|
# Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False")
|
||||||
|
terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar)
|
||||||
|
for match in terminal_literals:
|
||||||
|
for literal in match:
|
||||||
|
if literal:
|
||||||
|
quoted_strings.append(literal)
|
||||||
|
|
||||||
|
for s in quoted_strings:
|
||||||
|
s_lower = s.lower()
|
||||||
|
|
||||||
|
# Classify based on pattern
|
||||||
|
if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"):
|
||||||
|
operators.add(s)
|
||||||
|
elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"):
|
||||||
|
operators.add(s_lower)
|
||||||
|
elif s_lower in ("if", "not", "and", "or"):
|
||||||
|
keywords.add(s_lower)
|
||||||
|
elif s_lower in ("true", "false"):
|
||||||
|
literals.add(s)
|
||||||
|
elif s_lower in ("style", "format"):
|
||||||
|
functions.add(s_lower)
|
||||||
|
elif s_lower in ("column", "row", "cell", "value", "col"):
|
||||||
|
keywords.add(s_lower)
|
||||||
|
elif s_lower in ("number", "date", "boolean", "text", "enum"):
|
||||||
|
types.add(s_lower)
|
||||||
|
elif s_lower == "case":
|
||||||
|
keywords.add(s_lower)
|
||||||
|
|
||||||
|
# Find function-like patterns: word "("
|
||||||
|
function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar)
|
||||||
|
for func in function_patterns:
|
||||||
|
if func.lower() not in ("true", "false"):
|
||||||
|
functions.add(func.lower())
|
||||||
|
|
||||||
|
# Find type patterns from format_type rule
|
||||||
|
type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL)
|
||||||
|
if type_match:
|
||||||
|
type_strings = re.findall(r'"(\w+)"', type_match.group(1))
|
||||||
|
types.update(t.lower() for t in type_strings)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"keywords": sorted(keywords),
|
||||||
|
"operators": sorted(operators),
|
||||||
|
"functions": sorted(functions),
|
||||||
|
"types": sorted(types),
|
||||||
|
"literals": sorted(literals),
|
||||||
|
}
|
||||||
|
|||||||
@@ -285,7 +285,9 @@ def _get_column_value_suggestions(
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
values = provider.list_column_values(scope.column_name)
|
# Use table_name from scope, or empty string as fallback
|
||||||
|
table_name = scope.table_name or ""
|
||||||
|
values = provider.list_column_values(table_name, scope.column_name)
|
||||||
suggestions = []
|
suggestions = []
|
||||||
for value in values:
|
for value in values:
|
||||||
if value is None:
|
if value is None:
|
||||||
|
|||||||
@@ -1,172 +0,0 @@
|
|||||||
"""Tests for lark_to_lezer module."""
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from myfasthtml.core.dsl.lark_to_lezer import (
|
|
||||||
extract_completions_from_grammar,
|
|
||||||
lark_to_lezer_grammar,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Sample grammars for testing
|
|
||||||
SIMPLE_GRAMMAR = r'''
|
|
||||||
start: rule+
|
|
||||||
rule: "if" condition
|
|
||||||
condition: "value" operator literal
|
|
||||||
operator: "==" -> op_eq
|
|
||||||
| "!=" -> op_ne
|
|
||||||
| "contains" -> op_contains
|
|
||||||
literal: QUOTED_STRING -> string_literal
|
|
||||||
| BOOLEAN -> boolean_literal
|
|
||||||
QUOTED_STRING: /"[^"]*"/
|
|
||||||
BOOLEAN: "True" | "False"
|
|
||||||
'''
|
|
||||||
|
|
||||||
GRAMMAR_WITH_KEYWORDS = r'''
|
|
||||||
start: scope+
|
|
||||||
scope: "column" NAME ":" rule
|
|
||||||
| "row" INTEGER ":" rule
|
|
||||||
| "cell" cell_ref ":" rule
|
|
||||||
rule: style_expr condition?
|
|
||||||
condition: "if" "not"? comparison
|
|
||||||
comparison: operand "and" operand
|
|
||||||
| operand "or" operand
|
|
||||||
style_expr: "style" "(" args ")"
|
|
||||||
operand: "value" | literal
|
|
||||||
'''
|
|
||||||
|
|
||||||
GRAMMAR_WITH_TYPES = r'''
|
|
||||||
format_type: "number" -> fmt_number
|
|
||||||
| "date" -> fmt_date
|
|
||||||
| "boolean" -> fmt_boolean
|
|
||||||
| "text" -> fmt_text
|
|
||||||
| "enum" -> fmt_enum
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class TestExtractCompletions:
|
|
||||||
"""Tests for extract_completions_from_grammar function."""
|
|
||||||
|
|
||||||
def test_i_can_extract_keywords_from_grammar(self):
|
|
||||||
"""Test that keywords like if, not, and are extracted."""
|
|
||||||
completions = extract_completions_from_grammar(GRAMMAR_WITH_KEYWORDS)
|
|
||||||
|
|
||||||
assert "if" in completions["keywords"]
|
|
||||||
assert "not" in completions["keywords"]
|
|
||||||
assert "column" in completions["keywords"]
|
|
||||||
assert "row" in completions["keywords"]
|
|
||||||
assert "cell" in completions["keywords"]
|
|
||||||
assert "value" in completions["keywords"]
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"operator",
|
|
||||||
["==", "!=", "contains"],
|
|
||||||
)
|
|
||||||
def test_i_can_extract_operators_from_grammar(self, operator):
|
|
||||||
"""Test that operators are extracted from grammar."""
|
|
||||||
completions = extract_completions_from_grammar(SIMPLE_GRAMMAR)
|
|
||||||
|
|
||||||
assert operator in completions["operators"]
|
|
||||||
|
|
||||||
def test_i_can_extract_functions_from_grammar(self):
|
|
||||||
"""Test that function-like constructs are extracted."""
|
|
||||||
completions = extract_completions_from_grammar(GRAMMAR_WITH_KEYWORDS)
|
|
||||||
|
|
||||||
assert "style" in completions["functions"]
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"type_name",
|
|
||||||
["number", "date", "boolean", "text", "enum"],
|
|
||||||
)
|
|
||||||
def test_i_can_extract_types_from_grammar(self, type_name):
|
|
||||||
"""Test that type names are extracted from format_type rule."""
|
|
||||||
completions = extract_completions_from_grammar(GRAMMAR_WITH_TYPES)
|
|
||||||
|
|
||||||
assert type_name in completions["types"]
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("literal", [
|
|
||||||
"True",
|
|
||||||
"False"
|
|
||||||
])
|
|
||||||
def test_i_can_extract_literals_from_grammar(self, literal):
|
|
||||||
"""Test that literal values like True/False are extracted."""
|
|
||||||
completions = extract_completions_from_grammar(SIMPLE_GRAMMAR)
|
|
||||||
|
|
||||||
assert literal in completions["literals"]
|
|
||||||
|
|
||||||
def test_i_can_extract_completions_returns_all_categories(self):
|
|
||||||
"""Test that all completion categories are present in result."""
|
|
||||||
completions = extract_completions_from_grammar(SIMPLE_GRAMMAR)
|
|
||||||
|
|
||||||
assert "keywords" in completions
|
|
||||||
assert "operators" in completions
|
|
||||||
assert "functions" in completions
|
|
||||||
assert "types" in completions
|
|
||||||
assert "literals" in completions
|
|
||||||
|
|
||||||
def test_i_can_extract_completions_returns_sorted_lists(self):
|
|
||||||
"""Test that completion lists are sorted alphabetically."""
|
|
||||||
completions = extract_completions_from_grammar(SIMPLE_GRAMMAR)
|
|
||||||
|
|
||||||
for category in completions.values():
|
|
||||||
assert category == sorted(category)
|
|
||||||
|
|
||||||
|
|
||||||
class TestLarkToLezerConversion:
|
|
||||||
"""Tests for lark_to_lezer_grammar function."""
|
|
||||||
|
|
||||||
def test_i_can_convert_simple_grammar_to_lezer(self):
|
|
||||||
"""Test that a simple Lark grammar is converted to Lezer format."""
|
|
||||||
lezer = lark_to_lezer_grammar(SIMPLE_GRAMMAR)
|
|
||||||
|
|
||||||
# Should have @top directive
|
|
||||||
assert "@top Start" in lezer
|
|
||||||
# Should have @tokens block
|
|
||||||
assert "@tokens {" in lezer
|
|
||||||
# Should have @skip directive
|
|
||||||
assert "@skip {" in lezer
|
|
||||||
|
|
||||||
def test_i_can_convert_rule_names_to_pascal_case(self):
|
|
||||||
"""Test that snake_case rule names become PascalCase."""
|
|
||||||
grammar = r'''
|
|
||||||
my_rule: other_rule
|
|
||||||
other_rule: "test"
|
|
||||||
'''
|
|
||||||
lezer = lark_to_lezer_grammar(grammar)
|
|
||||||
|
|
||||||
assert "MyRule" in lezer
|
|
||||||
assert "OtherRule" in lezer
|
|
||||||
|
|
||||||
def test_i_cannot_include_internal_rules_in_lezer(self):
|
|
||||||
"""Test that rules starting with _ are not included."""
|
|
||||||
grammar = r'''
|
|
||||||
start: rule _NL
|
|
||||||
rule: "test"
|
|
||||||
_NL: /\n/
|
|
||||||
'''
|
|
||||||
lezer = lark_to_lezer_grammar(grammar)
|
|
||||||
|
|
||||||
# Internal rules should not appear as Lezer rules
|
|
||||||
assert "Nl {" not in lezer
|
|
||||||
|
|
||||||
def test_i_can_convert_terminal_regex_to_lezer(self):
|
|
||||||
"""Test that terminal regex patterns are converted."""
|
|
||||||
grammar = r'''
|
|
||||||
NAME: /[a-zA-Z_][a-zA-Z0-9_]*/
|
|
||||||
'''
|
|
||||||
lezer = lark_to_lezer_grammar(grammar)
|
|
||||||
|
|
||||||
assert "NAME" in lezer
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"terminal,pattern",
|
|
||||||
[
|
|
||||||
('BOOLEAN: "True" | "False"', "BOOLEAN"),
|
|
||||||
('KEYWORD: "if"', "KEYWORD"),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
def test_i_can_convert_terminal_strings_to_lezer(self, terminal, pattern):
|
|
||||||
"""Test that terminal string literals are converted."""
|
|
||||||
grammar = f"start: test\n{terminal}"
|
|
||||||
lezer = lark_to_lezer_grammar(grammar)
|
|
||||||
|
|
||||||
assert pattern in lezer
|
|
||||||
@@ -34,13 +34,13 @@ class MockProvider:
|
|||||||
Provides predefined data for columns, values, and presets.
|
Provides predefined data for columns, values, and presets.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def get_tables(self) -> list[str]:
|
def list_tables(self) -> list[str]:
|
||||||
return ["app.orders"]
|
return ["app.orders"]
|
||||||
|
|
||||||
def get_columns(self, table: str) -> list[str]:
|
def list_columns(self, table: str) -> list[str]:
|
||||||
return ["id", "amount", "status"]
|
return ["id", "amount", "status"]
|
||||||
|
|
||||||
def get_column_values(self, column: str) -> list[Any]:
|
def list_column_values(self, table: str, column: str) -> list[Any]:
|
||||||
if column == "status":
|
if column == "status":
|
||||||
return ["draft", "pending", "approved"]
|
return ["draft", "pending", "approved"]
|
||||||
if column == "amount":
|
if column == "amount":
|
||||||
@@ -50,10 +50,10 @@ class MockProvider:
|
|||||||
def get_row_count(self, table: str) -> int:
|
def get_row_count(self, table: str) -> int:
|
||||||
return 150
|
return 150
|
||||||
|
|
||||||
def get_style_presets(self) -> list[str]:
|
def list_style_presets(self) -> list[str]:
|
||||||
return ["custom_highlight"]
|
return ["custom_highlight"]
|
||||||
|
|
||||||
def get_format_presets(self) -> list[str]:
|
def list_format_presets(self) -> list[str]:
|
||||||
return ["CHF"]
|
return ["CHF"]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -84,14 +84,14 @@ class TestFormattingDSL:
|
|||||||
|
|
||||||
assert completions1 is completions2
|
assert completions1 is completions2
|
||||||
|
|
||||||
def test_i_can_get_lezer_grammar_is_cached(self):
|
def test_i_can_get_simple_mode_config_is_cached(self):
|
||||||
"""Test that lezer_grammar property is cached (same object returned)."""
|
"""Test that simple_mode_config property is cached (same object returned)."""
|
||||||
dsl = FormattingDSL()
|
dsl = FormattingDSL()
|
||||||
|
|
||||||
lezer1 = dsl.lezer_grammar
|
config1 = dsl.simple_mode_config
|
||||||
lezer2 = dsl.lezer_grammar
|
config2 = dsl.simple_mode_config
|
||||||
|
|
||||||
assert lezer1 is lezer2
|
assert config1 is config2
|
||||||
|
|
||||||
def test_i_can_get_editor_config(self):
|
def test_i_can_get_editor_config(self):
|
||||||
"""Test that get_editor_config() returns expected structure."""
|
"""Test that get_editor_config() returns expected structure."""
|
||||||
@@ -100,6 +100,7 @@ class TestFormattingDSL:
|
|||||||
config = dsl.get_editor_config()
|
config = dsl.get_editor_config()
|
||||||
|
|
||||||
assert "name" in config
|
assert "name" in config
|
||||||
assert "lezerGrammar" in config
|
assert "simpleModeConfig" in config
|
||||||
assert "completions" in config
|
assert "completions" in config
|
||||||
assert config["name"] == "Formatting DSL"
|
assert config["name"] == "Formatting DSL"
|
||||||
|
assert "start" in config["simpleModeConfig"] # Simple Mode structure
|
||||||
|
|||||||
Reference in New Issue
Block a user