Added syntax colorization

This commit is contained in:
2026-02-07 10:52:40 +01:00
parent db1e94f930
commit 1c1ced2a9f
13 changed files with 1049 additions and 330 deletions

View File

@@ -9,9 +9,9 @@ from abc import ABC, abstractmethod
from functools import cached_property
from typing import List, Dict, Any
# TODO: Replace with lark_to_simple_mode when implemented
from myfasthtml.core.dsl.lark_to_lezer import (
lark_to_lezer_grammar,
extract_completions_from_grammar,
extract_completions_from_grammar, # Will be moved to utils.py
)
from myfasthtml.core.utils import make_safe_id
@@ -39,18 +39,6 @@ class DSLDefinition(ABC):
"""
pass
@cached_property
def lezer_grammar(self) -> str:
"""
Return the Lezer grammar derived from the Lark grammar.
This is cached after first computation.
Returns:
The Lezer grammar as a string.
"""
return lark_to_lezer_grammar(self.get_grammar())
@cached_property
def completions(self) -> Dict[str, List[str]]:
"""
@@ -68,6 +56,26 @@ class DSLDefinition(ABC):
"""
return extract_completions_from_grammar(self.get_grammar())
@cached_property
def simple_mode_config(self) -> Dict[str, Any]:
"""
Return the CodeMirror 5 Simple Mode configuration for syntax highlighting.
This is cached after first computation.
Returns:
Dictionary with Simple Mode rules:
{
"start": [
{"regex": "...", "token": "keyword"},
{"regex": "...", "token": "string"},
...
]
}
"""
from myfasthtml.core.dsl.lark_to_simple_mode import lark_to_simple_mode
return lark_to_simple_mode(self.get_grammar())
def get_editor_config(self) -> Dict[str, Any]:
"""
Return the configuration for the DslEditor JavaScript initialization.

View File

@@ -1,256 +1,267 @@
"""
Utilities for converting Lark grammars to Lezer format and extracting completions.
This module provides functions to:
1. Transform a Lark grammar to a Lezer grammar for CodeMirror
2. Extract completion items (keywords, operators, etc.) from a Lark grammar
"""
import re
from typing import Dict, List, Set
def lark_to_lezer_grammar(lark_grammar: str) -> str:
"""
Convert a Lark grammar to a Lezer grammar.
This is a simplified converter that handles common Lark patterns.
Complex grammars may require manual adjustment.
Args:
lark_grammar: The Lark grammar string.
Returns:
The Lezer grammar string.
"""
lines = lark_grammar.strip().split("\n")
lezer_rules = []
tokens = []
for line in lines:
line = line.strip()
# Skip empty lines and comments
if not line or line.startswith("//") or line.startswith("#"):
continue
# Skip Lark-specific directives
if line.startswith("%"):
continue
# Parse rule definitions (lowercase names only)
rule_match = re.match(r"^([a-z_][a-z0-9_]*)\s*:\s*(.+)$", line)
if rule_match:
name, body = rule_match.groups()
lezer_rule = _convert_rule(name, body)
if lezer_rule:
lezer_rules.append(lezer_rule)
continue
# Parse terminal definitions (uppercase names)
terminal_match = re.match(r"^([A-Z_][A-Z0-9_]*)\s*:\s*(.+)$", line)
if terminal_match:
name, pattern = terminal_match.groups()
token = _convert_terminal(name, pattern)
if token:
tokens.append(token)
# Build Lezer grammar
lezer_output = ["@top Start { scope+ }", ""]
# Add rules
for rule in lezer_rules:
lezer_output.append(rule)
lezer_output.append("")
lezer_output.append("@tokens {")
# Add tokens
for token in tokens:
lezer_output.append(f" {token}")
# Add common tokens
lezer_output.extend([
' whitespace { $[ \\t]+ }',
' newline { $[\\n\\r] }',
' Comment { "#" ![$\\n]* }',
])
lezer_output.append("}")
lezer_output.append("")
lezer_output.append("@skip { whitespace | Comment }")
return "\n".join(lezer_output)
def _convert_rule(name: str, body: str) -> str:
"""Convert a single Lark rule to Lezer format."""
# Skip internal rules (starting with _)
if name.startswith("_"):
return ""
# Convert rule name to PascalCase for Lezer
lezer_name = _to_pascal_case(name)
# Convert body
lezer_body = _convert_body(body)
if lezer_body:
return f"{lezer_name} {{ {lezer_body} }}"
return ""
def _convert_terminal(name: str, pattern: str) -> str:
"""Convert a Lark terminal to Lezer token format."""
pattern = pattern.strip()
# Handle regex patterns
if pattern.startswith("/") and pattern.endswith("/"):
regex = pattern[1:-1]
# Convert to Lezer regex format
return f'{name} {{ ${regex}$ }}'
# Handle string literals
if pattern.startswith('"') or pattern.startswith("'"):
return f'{name} {{ {pattern} }}'
# Handle alternatives (literal strings separated by |)
if "|" in pattern:
alternatives = [alt.strip() for alt in pattern.split("|")]
if all(alt.startswith('"') or alt.startswith("'") for alt in alternatives):
return f'{name} {{ {" | ".join(alternatives)} }}'
return ""
def _convert_body(body: str) -> str:
"""Convert the body of a Lark rule to Lezer format."""
# Remove inline transformations (-> name)
body = re.sub(r"\s*->\s*\w+", "", body)
# Convert alternatives
parts = []
for alt in body.split("|"):
alt = alt.strip()
if alt:
converted = _convert_sequence(alt)
if converted:
parts.append(converted)
return " | ".join(parts)
def _convert_sequence(seq: str) -> str:
"""Convert a sequence of items in a rule."""
items = []
# Tokenize the sequence
tokens = re.findall(
r'"[^"]*"|\'[^\']*\'|/[^/]+/|\([^)]+\)|\[[^\]]+\]|[a-zA-Z_][a-zA-Z0-9_]*|\?|\*|\+',
seq
)
for token in tokens:
if token.startswith('"') or token.startswith("'"):
# String literal
items.append(token)
elif token.startswith("("):
# Group
inner = token[1:-1]
items.append(f"({_convert_body(inner)})")
elif token.startswith("["):
# Optional group in Lark
inner = token[1:-1]
items.append(f"({_convert_body(inner)})?")
elif token in ("?", "*", "+"):
# Quantifiers - attach to previous item
if items:
items[-1] = items[-1] + token
elif token.isupper() or token.startswith("_"):
# Terminal reference
items.append(token)
elif token.islower() or "_" in token:
# Rule reference - convert to PascalCase
items.append(_to_pascal_case(token))
return " ".join(items)
def _to_pascal_case(name: str) -> str:
"""Convert snake_case to PascalCase."""
return "".join(word.capitalize() for word in name.split("_"))
def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]:
"""
Extract completion items from a Lark grammar.
Parses the grammar to find:
- Keywords (reserved words like if, not, and)
- Operators (==, !=, contains, etc.)
- Functions (style, format, etc.)
- Types (number, date, boolean, etc.)
- Literals (True, False, etc.)
Args:
lark_grammar: The Lark grammar string.
Returns:
Dictionary with completion categories.
"""
keywords: Set[str] = set()
operators: Set[str] = set()
functions: Set[str] = set()
types: Set[str] = set()
literals: Set[str] = set()
# Find all quoted strings (potential keywords/operators)
quoted_strings = re.findall(r'"([^"]+)"', lark_grammar)
# Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False")
terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar)
for match in terminal_literals:
for literal in match:
if literal:
quoted_strings.append(literal)
for s in quoted_strings:
s_lower = s.lower()
# Classify based on pattern
if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"):
operators.add(s)
elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"):
operators.add(s_lower)
elif s_lower in ("if", "not", "and", "or"):
keywords.add(s_lower)
elif s_lower in ("true", "false"):
literals.add(s)
elif s_lower in ("style", "format"):
functions.add(s_lower)
elif s_lower in ("column", "row", "cell", "value", "col"):
keywords.add(s_lower)
elif s_lower in ("number", "date", "boolean", "text", "enum"):
types.add(s_lower)
elif s_lower == "case":
keywords.add(s_lower)
# Find function-like patterns: word "("
function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar)
for func in function_patterns:
if func.lower() not in ("true", "false"):
functions.add(func.lower())
# Find type patterns from format_type rule
type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL)
if type_match:
type_strings = re.findall(r'"(\w+)"', type_match.group(1))
types.update(t.lower() for t in type_strings)
return {
"keywords": sorted(keywords),
"operators": sorted(operators),
"functions": sorted(functions),
"types": sorted(types),
"literals": sorted(literals),
}
# """
# DEPRECATED: Utilities for converting Lark grammars to Lezer format.
#
# ⚠️ WARNING: This module is deprecated and will be removed in a future version.
#
# Original purpose:
# - Transform a Lark grammar to a Lezer grammar for CodeMirror 6
# - Extract completion items (keywords, operators, etc.) from a Lark grammar
#
# Deprecation reason:
# - CodeMirror 6 requires a bundler (Webpack, Rollup, etc.)
# - Incompatible with FastHTML's direct script inclusion approach
# - Replaced by CodeMirror 5 Simple Mode (see lark_to_simple_mode.py)
#
# Migration path:
# - Use lark_to_simple_mode.py for CodeMirror 5 syntax highlighting
# - extract_completions_from_grammar() is still used and will be moved to utils.py
# """
#
# import re
# from typing import Dict, List, Set
#
#
# def lark_to_lezer_grammar(lark_grammar: str) -> str:
# """
# Convert a Lark grammar to a Lezer grammar.
#
# This is a simplified converter that handles common Lark patterns.
# Complex grammars may require manual adjustment.
#
# Args:
# lark_grammar: The Lark grammar string.
#
# Returns:
# The Lezer grammar string.
# """
# lines = lark_grammar.strip().split("\n")
# lezer_rules = []
# tokens = []
#
# for line in lines:
# line = line.strip()
#
# # Skip empty lines and comments
# if not line or line.startswith("//") or line.startswith("#"):
# continue
#
# # Skip Lark-specific directives
# if line.startswith("%"):
# continue
#
# # Parse rule definitions (lowercase names only)
# rule_match = re.match(r"^([a-z_][a-z0-9_]*)\s*:\s*(.+)$", line)
# if rule_match:
# name, body = rule_match.groups()
# lezer_rule = _convert_rule(name, body)
# if lezer_rule:
# lezer_rules.append(lezer_rule)
# continue
#
# # Parse terminal definitions (uppercase names)
# terminal_match = re.match(r"^([A-Z_][A-Z0-9_]*)\s*:\s*(.+)$", line)
# if terminal_match:
# name, pattern = terminal_match.groups()
# token = _convert_terminal(name, pattern)
# if token:
# tokens.append(token)
#
# # Build Lezer grammar
# lezer_output = ["@top Start { scope+ }", ""]
#
# # Add rules
# for rule in lezer_rules:
# lezer_output.append(rule)
#
# lezer_output.append("")
# lezer_output.append("@tokens {")
#
# # Add tokens
# for token in tokens:
# lezer_output.append(f" {token}")
#
# # Add common tokens
# lezer_output.extend([
# ' whitespace { $[ \\t]+ }',
# ' newline { $[\\n\\r] }',
# ' Comment { "#" ![$\\n]* }',
# ])
#
# lezer_output.append("}")
# lezer_output.append("")
# lezer_output.append("@skip { whitespace | Comment }")
#
# return "\n".join(lezer_output)
#
#
# def _convert_rule(name: str, body: str) -> str:
# """Convert a single Lark rule to Lezer format."""
# # Skip internal rules (starting with _)
# if name.startswith("_"):
# return ""
#
# # Convert rule name to PascalCase for Lezer
# lezer_name = _to_pascal_case(name)
#
# # Convert body
# lezer_body = _convert_body(body)
#
# if lezer_body:
# return f"{lezer_name} {{ {lezer_body} }}"
# return ""
#
#
# def _convert_terminal(name: str, pattern: str) -> str:
# """Convert a Lark terminal to Lezer token format."""
# pattern = pattern.strip()
#
# # Handle regex patterns
# if pattern.startswith("/") and pattern.endswith("/"):
# regex = pattern[1:-1]
# # Convert to Lezer regex format
# return f'{name} {{ ${regex}$ }}'
#
# # Handle string literals
# if pattern.startswith('"') or pattern.startswith("'"):
# return f'{name} {{ {pattern} }}'
#
# # Handle alternatives (literal strings separated by |)
# if "|" in pattern:
# alternatives = [alt.strip() for alt in pattern.split("|")]
# if all(alt.startswith('"') or alt.startswith("'") for alt in alternatives):
# return f'{name} {{ {" | ".join(alternatives)} }}'
#
# return ""
#
#
# def _convert_body(body: str) -> str:
# """Convert the body of a Lark rule to Lezer format."""
# # Remove inline transformations (-> name)
# body = re.sub(r"\s*->\s*\w+", "", body)
#
# # Convert alternatives
# parts = []
# for alt in body.split("|"):
# alt = alt.strip()
# if alt:
# converted = _convert_sequence(alt)
# if converted:
# parts.append(converted)
#
# return " | ".join(parts)
#
#
# def _convert_sequence(seq: str) -> str:
# """Convert a sequence of items in a rule."""
# items = []
#
# # Tokenize the sequence
# tokens = re.findall(
# r'"[^"]*"|\'[^\']*\'|/[^/]+/|\([^)]+\)|\[[^\]]+\]|[a-zA-Z_][a-zA-Z0-9_]*|\?|\*|\+',
# seq
# )
#
# for token in tokens:
# if token.startswith('"') or token.startswith("'"):
# # String literal
# items.append(token)
# elif token.startswith("("):
# # Group
# inner = token[1:-1]
# items.append(f"({_convert_body(inner)})")
# elif token.startswith("["):
# # Optional group in Lark
# inner = token[1:-1]
# items.append(f"({_convert_body(inner)})?")
# elif token in ("?", "*", "+"):
# # Quantifiers - attach to previous item
# if items:
# items[-1] = items[-1] + token
# elif token.isupper() or token.startswith("_"):
# # Terminal reference
# items.append(token)
# elif token.islower() or "_" in token:
# # Rule reference - convert to PascalCase
# items.append(_to_pascal_case(token))
#
# return " ".join(items)
#
#
# def _to_pascal_case(name: str) -> str:
# """Convert snake_case to PascalCase."""
# return "".join(word.capitalize() for word in name.split("_"))
#
#
# def extract_completions_from_grammar(lark_grammar: str) -> Dict[str, List[str]]:
# """
# Extract completion items from a Lark grammar.
#
# Parses the grammar to find:
# - Keywords (reserved words like if, not, and)
# - Operators (==, !=, contains, etc.)
# - Functions (style, format, etc.)
# - Types (number, date, boolean, etc.)
# - Literals (True, False, etc.)
#
# Args:
# lark_grammar: The Lark grammar string.
#
# Returns:
# Dictionary with completion categories.
# """
# keywords: Set[str] = set()
# operators: Set[str] = set()
# functions: Set[str] = set()
# types: Set[str] = set()
# literals: Set[str] = set()
#
# # Find all quoted strings (potential keywords/operators)
# quoted_strings = re.findall(r'"([^"]+)"', lark_grammar)
#
# # Also look for terminal definitions with string alternatives (e.g., BOOLEAN: "True" | "False")
# terminal_literals = re.findall(r'[A-Z_]+:\s*"([^"]+)"(?:\s*\|\s*"([^"]+)")*', lark_grammar)
# for match in terminal_literals:
# for literal in match:
# if literal:
# quoted_strings.append(literal)
#
# for s in quoted_strings:
# s_lower = s.lower()
#
# # Classify based on pattern
# if s in ("==", "!=", "<=", "<", ">=", ">", "+", "-", "*", "/"):
# operators.add(s)
# elif s_lower in ("contains", "startswith", "endswith", "in", "between", "isempty", "isnotempty"):
# operators.add(s_lower)
# elif s_lower in ("if", "not", "and", "or"):
# keywords.add(s_lower)
# elif s_lower in ("true", "false"):
# literals.add(s)
# elif s_lower in ("style", "format"):
# functions.add(s_lower)
# elif s_lower in ("column", "row", "cell", "value", "col"):
# keywords.add(s_lower)
# elif s_lower in ("number", "date", "boolean", "text", "enum"):
# types.add(s_lower)
# elif s_lower == "case":
# keywords.add(s_lower)
#
# # Find function-like patterns: word "("
# function_patterns = re.findall(r'"(\w+)"\s*"?\("', lark_grammar)
# for func in function_patterns:
# if func.lower() not in ("true", "false"):
# functions.add(func.lower())
#
# # Find type patterns from format_type rule
# type_match = re.search(r'format_type\s*:\s*(.+?)(?:\n\n|\Z)', lark_grammar, re.DOTALL)
# if type_match:
# type_strings = re.findall(r'"(\w+)"', type_match.group(1))
# types.update(t.lower() for t in type_strings)
#
# return {
# "keywords": sorted(keywords),
# "operators": sorted(operators),
# "functions": sorted(functions),
# "types": sorted(types),
# "literals": sorted(literals),
# }

View File

@@ -0,0 +1,240 @@
"""
Utilities for converting Lark grammars to CodeMirror 5 Simple Mode format.
This module provides functions to extract regex patterns from Lark grammar
terminals and generate a CodeMirror Simple Mode configuration for syntax highlighting.
"""
import re
from typing import Dict, List, Any
def lark_to_simple_mode(lark_grammar: str) -> Dict[str, Any]:
"""
Convert a Lark grammar to CodeMirror 5 Simple Mode configuration.
Extracts terminal definitions (regex patterns) from the Lark grammar and
maps them to CodeMirror token classes for syntax highlighting.
Args:
lark_grammar: The Lark grammar string.
Returns:
Dictionary with Simple Mode configuration:
{
"start": [
{"regex": "...", "token": "keyword"},
{"regex": "...", "token": "string"},
...
]
}
"""
# Extract keywords from literal strings in grammar rules
keywords = _extract_keywords(lark_grammar)
# Extract terminals (regex patterns)
terminals = _extract_terminals(lark_grammar)
# Build Simple Mode rules
rules = []
# Comments (must come first to have priority)
rules.append({
"regex": r"#.*",
"token": "comment"
})
# Keywords
if keywords:
keyword_pattern = r"\b(?:" + "|".join(re.escape(k) for k in keywords) + r")\b"
rules.append({
"regex": keyword_pattern,
"token": "keyword"
})
# Terminals mapped to token types
terminal_mappings = {
"QUOTED_STRING": "string",
"SIGNED_NUMBER": "number",
"INTEGER": "number",
"BOOLEAN": "atom",
"CELL_ID": "variable-3",
"NAME": "variable",
}
for term_name, pattern in terminals.items():
if term_name in terminal_mappings:
token_type = terminal_mappings[term_name]
js_pattern = _lark_regex_to_js(pattern)
if js_pattern:
rules.append({
"regex": js_pattern,
"token": token_type
})
return {"start": rules}
def _extract_keywords(grammar: str) -> List[str]:
"""
Extract keyword literals from grammar rules.
Looks for quoted string literals in rules (e.g., "column", "if", "style").
Args:
grammar: The Lark grammar string.
Returns:
List of keyword strings.
"""
keywords = set()
# Match quoted literals in rules (not in terminal definitions)
# Pattern: "keyword" but not in lines like: TERMINAL: "pattern"
lines = grammar.split("\n")
for line in lines:
# Skip terminal definitions (uppercase name followed by colon)
if re.match(r'\s*[A-Z_]+\s*:', line):
continue
# Skip comments
if line.strip().startswith("//") or line.strip().startswith("#"):
continue
# Find quoted strings in rules
matches = re.findall(r'"([a-z_]+)"', line)
for match in matches:
# Filter out regex-like patterns, keep only identifiers
if re.match(r'^[a-z_]+$', match):
keywords.add(match)
return sorted(keywords)
def _extract_terminals(grammar: str) -> Dict[str, str]:
"""
Extract terminal definitions from Lark grammar.
Args:
grammar: The Lark grammar string.
Returns:
Dictionary mapping terminal names to their regex patterns.
"""
terminals = {}
lines = grammar.split("\n")
for line in lines:
# Match terminal definitions: NAME: /regex/ or NAME: "literal"
match = re.match(r'\s*([A-Z_]+)\s*:\s*/([^/]+)/', line)
if match:
name, pattern = match.groups()
terminals[name] = pattern
continue
# Match literal alternatives: BOOLEAN: "True" | "False"
match = re.match(r'\s*([A-Z_]+)\s*:\s*(.+)', line)
if match:
name, alternatives = match.groups()
# Extract quoted literals
literals = re.findall(r'"([^"]+)"', alternatives)
if literals:
# Build regex alternation
pattern = "|".join(re.escape(lit) for lit in literals)
terminals[name] = pattern
return terminals
def _lark_regex_to_js(lark_pattern: str) -> str:
"""
Convert a Lark regex pattern to JavaScript regex.
This is a simplified converter that handles common patterns.
Complex patterns may need manual adjustment.
Args:
lark_pattern: Lark regex pattern.
Returns:
JavaScript regex pattern string, or empty string if conversion fails.
"""
# Remove Lark-specific flags
pattern = lark_pattern.strip()
# Handle common patterns
conversions = [
# Escape sequences
(r'\[', r'['),
(r'\]', r']'),
# Character classes are mostly compatible
# Numbers: [0-9]+ or \d+
# Letters: [a-zA-Z]
# Whitespace: [ \t]
]
result = pattern
for lark_pat, js_pat in conversions:
result = result.replace(lark_pat, js_pat)
# Wrap in word boundaries for identifier-like patterns
# Example: [a-zA-Z_][a-zA-Z0-9_]* → \b[a-zA-Z_][a-zA-Z0-9_]*\b
if re.match(r'\[[a-zA-Z_]+\]', result):
result = r'\b' + result + r'\b'
return result
def generate_formatting_dsl_mode() -> Dict[str, Any]:
"""
Generate Simple Mode configuration for the Formatting DSL.
This is a specialized version with hand-tuned rules for better highlighting.
Returns:
Simple Mode configuration dictionary.
"""
return {
"start": [
# Comments (highest priority)
{"regex": r"#.*", "token": "comment"},
# Scope keywords
{"regex": r"\b(?:column|row|cell)\b", "token": "keyword"},
# Condition keywords
{"regex": r"\b(?:if|not|and|or|in|between|case)\b", "token": "keyword"},
# Built-in functions
{"regex": r"\b(?:style|format)\b", "token": "builtin"},
# Format types
{"regex": r"\b(?:number|date|boolean|text|enum)\b", "token": "builtin"},
# String operators (word-like)
{"regex": r"\b(?:contains|startswith|endswith|isempty|isnotempty)\b", "token": "operator"},
# Comparison operators (symbols)
{"regex": r"==|!=|<=|>=|<|>", "token": "operator"},
# Special references
{"regex": r"\b(?:value|col|row|cell)\b", "token": "variable-2"},
# Booleans
{"regex": r"\b(?:True|False|true|false)\b", "token": "atom"},
# Numbers (integers and floats, with optional sign)
{"regex": r"[+-]?\b\d+(?:\.\d+)?\b", "token": "number"},
# Strings (double or single quoted)
{"regex": r'"(?:[^\\"]|\\.)*"', "token": "string"},
{"regex": r"'(?:[^\\']|\\.)*'", "token": "string"},
# Cell IDs
{"regex": r"\btcell_[a-zA-Z0-9_-]+\b", "token": "variable-3"},
# Names (identifiers) - lowest priority
{"regex": r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", "token": "variable"},
]
}

View File

@@ -5,6 +5,9 @@ Provides the Lark grammar and derived completions for the
DataGrid Formatting DSL.
"""
from functools import cached_property
from typing import Dict, Any
from myfasthtml.core.dsl.base import DSLDefinition
from myfasthtml.core.formatting.dsl.grammar import GRAMMAR
@@ -15,9 +18,20 @@ class FormattingDSL(DSLDefinition):
Uses the existing Lark grammar from grammar.py.
"""
name: str = "Formatting DSL"
def get_grammar(self) -> str:
"""Return the Lark grammar for formatting DSL."""
return GRAMMAR
@cached_property
def simple_mode_config(self) -> Dict[str, Any]:
"""
Return hand-tuned Simple Mode configuration for optimal highlighting.
Overrides the base class to use a specialized configuration
rather than auto-generated one.
"""
from myfasthtml.core.dsl.lark_to_simple_mode import generate_formatting_dsl_mode
return generate_formatting_dsl_mode()