Introducing columns formulas

This commit is contained in:
2026-02-13 21:38:00 +01:00
parent 0df78c0513
commit e8443f07f9
29 changed files with 3889 additions and 15 deletions

View File

@@ -0,0 +1,398 @@
"""
Formula Engine — facade orchestrating parsing, DAG, and evaluation.
Coordinates:
- Parsing formula text via the DSL parser
- Registering formulas and their dependencies in the DependencyGraph
- Evaluating dirty formula columns row-by-row via FormulaEvaluator
- Updating ns_fast_access caches in the DatagridStore
"""
import logging
from typing import Any, Callable, Optional
import numpy as np
from .dataclasses import FormulaDefinition, WhereClause
from .dependency_graph import DependencyGraph
from .dsl.parser import get_parser
from .dsl.transformer import FormulaTransformer
from .evaluator import FormulaEvaluator
logger = logging.getLogger("FormulaEngine")
# Callback that returns a DatagridStore-like object for a given table name
RegistryResolver = Callable[[str], Any]
def parse_formula(text: str) -> FormulaDefinition | None:
"""Parse a formula expression string into a FormulaDefinition AST.
Args:
text: The formula expression string.
Returns:
FormulaDefinition on success, None if text is empty.
Raises:
FormulaSyntaxError: If the formula text is syntactically invalid.
"""
text = text.strip() if text else ""
if not text:
return None
parser = get_parser()
tree = parser.parse(text)
if tree is None:
return None
transformer = FormulaTransformer()
formula = transformer.transform(tree)
formula.source_text = text
return formula
class FormulaEngine:
"""
Facade for the formula calculation system.
Orchestrates formula parsing, dependency tracking, and incremental
recalculation of formula columns.
Args:
registry_resolver: Callback that takes a table name and returns
the DatagridStore for that table (used for cross-table refs).
Provided by DataGridsManager.
"""
def __init__(self, registry_resolver: Optional[RegistryResolver] = None):
self._graph = DependencyGraph()
self._registry_resolver = registry_resolver
# Cache of parsed formulas: {(table, col): FormulaDefinition}
self._formulas: dict[tuple[str, str], FormulaDefinition] = {}
def set_formula(self, table: str, col: str, formula_text: str) -> None:
"""
Parse and register a formula for a column.
Args:
table: Table name.
col: Column name.
formula_text: The formula expression string.
Raises:
FormulaSyntaxError: If the formula is syntactically invalid.
FormulaCycleError: If the formula would create a circular dependency.
"""
formula_text = formula_text.strip() if formula_text else ""
if not formula_text:
self.remove_formula(table, col)
return
formula = parse_formula(formula_text)
if formula is None:
self.remove_formula(table, col)
return
# Registers in DAG and raises FormulaCycleError if cycle detected
self._graph.add_formula(table, col, formula)
self._formulas[(table, col)] = formula
logger.debug("Formula set for %s.%s: %s", table, col, formula_text)
def remove_formula(self, table: str, col: str) -> None:
"""
Remove a formula column from the engine.
Args:
table: Table name.
col: Column name.
"""
self._graph.remove_formula(table, col)
self._formulas.pop((table, col), None)
def mark_data_changed(
self,
table: str,
col: str,
rows: Optional[list[int]] = None,
) -> None:
"""
Mark a column's data as changed, propagating dirty flags.
Call this when source data is modified so that dependent formula
columns are re-evaluated on next render.
Args:
table: Table name.
col: Column name.
rows: Specific row indices that changed. None means all rows.
"""
self._graph.mark_dirty(table, col, rows)
def recalculate_if_needed(self, table: str, store: Any) -> bool:
"""
Recalculate all dirty formula columns for a table.
Should be called at the start of ``mk_body_content_page()`` to
ensure formula columns are up-to-date before rendering.
Updates ``store.ns_fast_access`` and ``store.ns_row_data`` in place.
Args:
table: Table name.
store: The DatagridStore instance for this table.
Returns:
True if any columns were recalculated, False otherwise.
"""
dirty_nodes = self._graph.get_calculation_order(table=table)
if not dirty_nodes:
return False
for node in dirty_nodes:
formula = node.formula
if formula is None:
continue
self._evaluate_column(table, node.column, formula, store)
self._graph.clear_dirty(node.node_id)
# Rebuild ns_row_data after recalculation
if dirty_nodes and store.ns_fast_access:
self._rebuild_row_data(store)
return True
def has_formula(self, table: str, col: str) -> bool:
"""
Check if a column has a formula registered.
Args:
table: Table name.
col: Column name.
Returns:
True if the column has a registered formula.
"""
return self._graph.has_formula(table, col)
def get_formula_text(self, table: str, col: str) -> Optional[str]:
"""
Get the source text of a registered formula.
Args:
table: Table name.
col: Column name.
Returns:
Formula source text or None if not registered.
"""
formula = self._formulas.get((table, col))
return formula.source_text if formula else None
# ==================== Private helpers ====================
def _evaluate_column(
self,
table: str,
col: str,
formula: FormulaDefinition,
store: Any,
) -> None:
"""
Evaluate a formula column row-by-row and update ns_fast_access.
Args:
table: Table name.
col: Column name.
formula: The parsed FormulaDefinition.
store: The DatagridStore with ns_fast_access and ns_row_data.
"""
if store.ns_row_data is None or len(store.ns_row_data) == 0:
return
n_rows = len(store.ns_row_data)
resolver = self._make_cross_table_resolver(table)
evaluator = FormulaEvaluator(cross_table_resolver=resolver)
# Ensure ns_fast_access exists before the loop so that formula columns
# evaluated earlier in the same pass are visible to subsequent columns.
if store.ns_fast_access is None:
store.ns_fast_access = {}
results = np.empty(n_rows, dtype=object)
for row_index in range(n_rows):
# Build row_data from ns_fast_access so that formula columns evaluated
# earlier in this pass (e.g. B) are available to dependent columns (e.g. C).
row_data = {
c: arr[row_index]
for c, arr in store.ns_fast_access.items()
if arr is not None and row_index < len(arr)
}
results[row_index] = evaluator.evaluate(formula, row_data, row_index)
store.ns_fast_access[col] = results
logger.debug("Evaluated formula column %s.%s (%d rows)", table, col, n_rows)
def _rebuild_row_data(self, store: Any) -> None:
"""
Rebuild ns_row_data to include formula column results.
This ensures formula values are available to dependent formulas
in subsequent evaluation passes.
Args:
store: The DatagridStore to update.
"""
if store.ns_fast_access is None:
return
n_rows = len(store.ns_row_data)
for row_index in range(n_rows):
row = store.ns_row_data[row_index]
for col, arr in store.ns_fast_access.items():
if arr is not None and row_index < len(arr):
row[col] = arr[row_index]
def _make_cross_table_resolver(self, current_table: str):
"""
Create a cross-table resolver callback for the given table context.
Resolution strategy:
1. Explicit WHERE clause: scan remote column for matching rows.
2. Implicit join by ``id`` column: match rows where both tables share
the same id value.
3. Fallback: match by row_index.
Args:
current_table: The table that contains the formula.
Returns:
A callable ``resolver(table, column, where_clause, row_index) -> value``.
"""
def resolver(
remote_table: str,
remote_column: str,
where_clause: Optional[WhereClause],
row_index: int,
) -> Any:
if self._registry_resolver is None:
logger.warning(
"No registry_resolver set for cross-table ref %s.%s",
remote_table, remote_column,
)
return None
remote_store = self._registry_resolver(remote_table)
if remote_store is None:
logger.warning("Table '%s' not found in registry", remote_table)
return None
ns = remote_store.ns_fast_access
if not ns or remote_column not in ns:
logger.debug(
"Column '%s' not found in table '%s'", remote_column, remote_table
)
return None
remote_array = ns[remote_column]
# Strategy 1: Explicit WHERE clause
if where_clause is not None:
return self._resolve_with_where(
where_clause, remote_store, remote_column,
remote_array, current_table, row_index,
)
# Strategy 2: Implicit join by 'id' column
current_store = self._registry_resolver(current_table)
if (
current_store is not None
and current_store.ns_fast_access is not None
and "id" in current_store.ns_fast_access
and "id" in ns
):
local_id_arr = current_store.ns_fast_access["id"]
remote_id_arr = ns["id"]
if row_index < len(local_id_arr):
local_id = local_id_arr[row_index]
# Find first matching row in remote table
matches = np.where(remote_id_arr == local_id)[0]
if len(matches) > 0:
return remote_array[matches[0]]
return None
# Strategy 3: Fallback — match by row_index
if row_index < len(remote_array):
return remote_array[row_index]
return None
return resolver
def _resolve_with_where(
self,
where_clause: WhereClause,
remote_store: Any,
remote_column: str,
remote_array: Any,
current_table: str,
row_index: int,
) -> Any:
"""
Resolve a cross-table reference using an explicit WHERE clause.
Args:
where_clause: The parsed WHERE clause.
remote_store: DatagridStore for the remote table.
remote_column: Column to return value from.
remote_array: numpy array of the remote column values.
current_table: Table containing the formula.
row_index: Current row being evaluated.
Returns:
The value from the first matching remote row, or None.
"""
remote_ns = remote_store.ns_fast_access
if not remote_ns:
return None
# Get the remote key column array
remote_key_col = where_clause.remote_column
if remote_key_col not in remote_ns:
logger.debug(
"WHERE key column '%s' not found in remote table", remote_key_col
)
return None
remote_key_array = remote_ns[remote_key_col]
# Get the local value to compare
current_store = self._registry_resolver(current_table) if self._registry_resolver else None
if current_store is None or current_store.ns_fast_access is None:
return None
local_col = where_clause.local_column
if local_col not in current_store.ns_fast_access:
logger.debug("WHERE local column '%s' not found", local_col)
return None
local_array = current_store.ns_fast_access[local_col]
if row_index >= len(local_array):
return None
local_value = local_array[row_index]
# Find matching rows
try:
matches = np.where(remote_key_array == local_value)[0]
except Exception:
matches = []
if len(matches) == 0:
return None
# Return value from first match (use aggregation functions for multi-row)
return remote_array[matches[0]]