""" Formula Engine — facade orchestrating parsing, DAG, and evaluation. Coordinates: - Parsing formula text via the DSL parser - Registering formulas and their dependencies in the DependencyGraph - Evaluating dirty formula columns row-by-row via FormulaEvaluator - Updating ns_fast_access caches in the DatagridStore """ import logging from typing import Any, Callable, Optional import numpy as np from .dataclasses import FormulaDefinition, WhereClause from .dependency_graph import DependencyGraph from .dsl.parser import get_parser from .dsl.transformer import FormulaTransformer from .evaluator import FormulaEvaluator logger = logging.getLogger("FormulaEngine") # Callback that returns a DatagridStore-like object for a given table name RegistryResolver = Callable[[str], Any] def parse_formula(text: str) -> FormulaDefinition | None: """Parse a formula expression string into a FormulaDefinition AST. Args: text: The formula expression string. Returns: FormulaDefinition on success, None if text is empty. Raises: FormulaSyntaxError: If the formula text is syntactically invalid. """ text = text.strip() if text else "" if not text: return None parser = get_parser() tree = parser.parse(text) if tree is None: return None transformer = FormulaTransformer() formula = transformer.transform(tree) formula.source_text = text return formula class FormulaEngine: """ Facade for the formula calculation system. Orchestrates formula parsing, dependency tracking, and incremental recalculation of formula columns. Args: registry_resolver: Callback that takes a table name and returns the DatagridStore for that table (used for cross-table refs). Provided by DataGridsManager. """ def __init__(self, registry_resolver: Optional[RegistryResolver] = None): self._graph = DependencyGraph() self._registry_resolver = registry_resolver # Cache of parsed formulas: {(table, col): FormulaDefinition} self._formulas: dict[tuple[str, str], FormulaDefinition] = {} def set_formula(self, table: str, col: str, formula_text: str) -> None: """ Parse and register a formula for a column. Args: table: Table name. col: Column name. formula_text: The formula expression string. Raises: FormulaSyntaxError: If the formula is syntactically invalid. FormulaCycleError: If the formula would create a circular dependency. """ formula_text = formula_text.strip() if formula_text else "" if not formula_text: self.remove_formula(table, col) return formula = parse_formula(formula_text) if formula is None: self.remove_formula(table, col) return # Registers in DAG and raises FormulaCycleError if cycle detected self._graph.add_formula(table, col, formula) self._formulas[(table, col)] = formula logger.debug("Formula set for %s.%s: %s", table, col, formula_text) def remove_formula(self, table: str, col: str) -> None: """ Remove a formula column from the engine. Args: table: Table name. col: Column name. """ self._graph.remove_formula(table, col) self._formulas.pop((table, col), None) def mark_data_changed( self, table: str, col: str, rows: Optional[list[int]] = None, ) -> None: """ Mark a column's data as changed, propagating dirty flags. Call this when source data is modified so that dependent formula columns are re-evaluated on next render. Args: table: Table name. col: Column name. rows: Specific row indices that changed. None means all rows. """ self._graph.mark_dirty(table, col, rows) def recalculate_if_needed(self, table: str, store: Any) -> bool: """ Recalculate all dirty formula columns for a table. Should be called at the start of ``mk_body_content_page()`` to ensure formula columns are up-to-date before rendering. Updates ``store.ns_fast_access`` and ``store.ns_row_data`` in place. Args: table: Table name. store: The DatagridStore instance for this table. Returns: True if any columns were recalculated, False otherwise. """ dirty_nodes = self._graph.get_calculation_order(table=table) if not dirty_nodes: return False for node in dirty_nodes: formula = node.formula if formula is None: continue self._evaluate_column(table, node.column, formula, store) self._graph.clear_dirty(node.node_id) # Rebuild ns_row_data after recalculation if dirty_nodes and store.ns_fast_access: self._rebuild_row_data(store) return True def has_formula(self, table: str, col: str) -> bool: """ Check if a column has a formula registered. Args: table: Table name. col: Column name. Returns: True if the column has a registered formula. """ return self._graph.has_formula(table, col) def get_formula_text(self, table: str, col: str) -> Optional[str]: """ Get the source text of a registered formula. Args: table: Table name. col: Column name. Returns: Formula source text or None if not registered. """ formula = self._formulas.get((table, col)) return formula.source_text if formula else None # ==================== Private helpers ==================== def _evaluate_column( self, table: str, col: str, formula: FormulaDefinition, store: Any, ) -> None: """ Evaluate a formula column row-by-row and update ns_fast_access. Args: table: Table name. col: Column name. formula: The parsed FormulaDefinition. store: The DatagridStore with ns_fast_access and ns_row_data. """ if store.ns_row_data is None or len(store.ns_row_data) == 0: return n_rows = len(store.ns_row_data) resolver = self._make_cross_table_resolver(table) evaluator = FormulaEvaluator(cross_table_resolver=resolver) # Ensure ns_fast_access exists before the loop so that formula columns # evaluated earlier in the same pass are visible to subsequent columns. if store.ns_fast_access is None: store.ns_fast_access = {} results = np.empty(n_rows, dtype=object) for row_index in range(n_rows): # Build row_data from ns_fast_access so that formula columns evaluated # earlier in this pass (e.g. B) are available to dependent columns (e.g. C). row_data = { c: arr[row_index] for c, arr in store.ns_fast_access.items() if arr is not None and row_index < len(arr) } results[row_index] = evaluator.evaluate(formula, row_data, row_index) store.ns_fast_access[col] = results logger.debug("Evaluated formula column %s.%s (%d rows)", table, col, n_rows) def _rebuild_row_data(self, store: Any) -> None: """ Rebuild ns_row_data to include formula column results. This ensures formula values are available to dependent formulas in subsequent evaluation passes. Args: store: The DatagridStore to update. """ if store.ns_fast_access is None: return n_rows = len(store.ns_row_data) for row_index in range(n_rows): row = store.ns_row_data[row_index] for col, arr in store.ns_fast_access.items(): if arr is not None and row_index < len(arr): row[col] = arr[row_index] def _make_cross_table_resolver(self, current_table: str): """ Create a cross-table resolver callback for the given table context. Resolution strategy: 1. Explicit WHERE clause: scan remote column for matching rows. 2. Implicit join by ``id`` column: match rows where both tables share the same id value. 3. Fallback: match by row_index. Args: current_table: The table that contains the formula. Returns: A callable ``resolver(table, column, where_clause, row_index) -> value``. """ def resolver( remote_table: str, remote_column: str, where_clause: Optional[WhereClause], row_index: int, ) -> Any: if self._registry_resolver is None: logger.warning( "No registry_resolver set for cross-table ref %s.%s", remote_table, remote_column, ) return None remote_store = self._registry_resolver(remote_table) if remote_store is None: logger.warning("Table '%s' not found in registry", remote_table) return None ns = remote_store.ns_fast_access if not ns or remote_column not in ns: logger.debug( "Column '%s' not found in table '%s'", remote_column, remote_table ) return None remote_array = ns[remote_column] # Strategy 1: Explicit WHERE clause if where_clause is not None: return self._resolve_with_where( where_clause, remote_store, remote_column, remote_array, current_table, row_index, ) # Strategy 2: Implicit join by 'id' column current_store = self._registry_resolver(current_table) if ( current_store is not None and current_store.ns_fast_access is not None and "id" in current_store.ns_fast_access and "id" in ns ): local_id_arr = current_store.ns_fast_access["id"] remote_id_arr = ns["id"] if row_index < len(local_id_arr): local_id = local_id_arr[row_index] # Find first matching row in remote table matches = np.where(remote_id_arr == local_id)[0] if len(matches) > 0: return remote_array[matches[0]] return None # Strategy 3: Fallback — match by row_index if row_index < len(remote_array): return remote_array[row_index] return None return resolver def _resolve_with_where( self, where_clause: WhereClause, remote_store: Any, remote_column: str, remote_array: Any, current_table: str, row_index: int, ) -> Any: """ Resolve a cross-table reference using an explicit WHERE clause. Args: where_clause: The parsed WHERE clause. remote_store: DatagridStore for the remote table. remote_column: Column to return value from. remote_array: numpy array of the remote column values. current_table: Table containing the formula. row_index: Current row being evaluated. Returns: The value from the first matching remote row, or None. """ remote_ns = remote_store.ns_fast_access if not remote_ns: return None # Get the remote key column array remote_key_col = where_clause.remote_column if remote_key_col not in remote_ns: logger.debug( "WHERE key column '%s' not found in remote table", remote_key_col ) return None remote_key_array = remote_ns[remote_key_col] # Get the local value to compare current_store = self._registry_resolver(current_table) if self._registry_resolver else None if current_store is None or current_store.ns_fast_access is None: return None local_col = where_clause.local_column if local_col not in current_store.ns_fast_access: logger.debug("WHERE local column '%s' not found", local_col) return None local_array = current_store.ns_fast_access[local_col] if row_index >= len(local_array): return None local_value = local_array[row_index] # Find matching rows try: matches = np.where(remote_key_array == local_value)[0] except Exception: matches = [] if len(matches) == 0: return None # Return value from first match (use aggregation functions for multi-row) return remote_array[matches[0]]