Introducing columns formulas

2026-02-13 21:38:00 +01:00
parent 0df78c0513
commit e8443f07f9
29 changed files with 3889 additions and 15 deletions
--- a/src/myfasthtml/core/formula/engine.py
+++ b/src/myfasthtml/core/formula/engine.py
@@ -0,0 +1,398 @@
+"""
+Formula Engine — facade orchestrating parsing, DAG, and evaluation.
+
+Coordinates:
+- Parsing formula text via the DSL parser
+- Registering formulas and their dependencies in the DependencyGraph
+- Evaluating dirty formula columns row-by-row via FormulaEvaluator
+- Updating ns_fast_access caches in the DatagridStore
+"""
+import logging
+from typing import Any, Callable, Optional
+
+import numpy as np
+
+from .dataclasses import FormulaDefinition, WhereClause
+from .dependency_graph import DependencyGraph
+from .dsl.parser import get_parser
+from .dsl.transformer import FormulaTransformer
+from .evaluator import FormulaEvaluator
+
+logger = logging.getLogger("FormulaEngine")
+
+# Callback that returns a DatagridStore-like object for a given table name
+RegistryResolver = Callable[[str], Any]
+
+
+def parse_formula(text: str) -> FormulaDefinition | None:
+  """Parse a formula expression string into a FormulaDefinition AST.
+
+  Args:
+      text: The formula expression string.
+
+  Returns:
+      FormulaDefinition on success, None if text is empty.
+
+  Raises:
+      FormulaSyntaxError: If the formula text is syntactically invalid.
+  """
+  text = text.strip() if text else ""
+  if not text:
+    return None
+  
+  parser = get_parser()
+  tree = parser.parse(text)
+  if tree is None:
+    return None
+  
+  transformer = FormulaTransformer()
+  formula = transformer.transform(tree)
+  formula.source_text = text
+  return formula
+
+
+class FormulaEngine:
+  """
+  Facade for the formula calculation system.
+
+  Orchestrates formula parsing, dependency tracking, and incremental
+  recalculation of formula columns.
+
+  Args:
+      registry_resolver: Callback that takes a table name and returns
+          the DatagridStore for that table (used for cross-table refs).
+          Provided by DataGridsManager.
+  """
+  
+  def __init__(self, registry_resolver: Optional[RegistryResolver] = None):
+    self._graph = DependencyGraph()
+    self._registry_resolver = registry_resolver
+    # Cache of parsed formulas: {(table, col): FormulaDefinition}
+    self._formulas: dict[tuple[str, str], FormulaDefinition] = {}
+  
+  def set_formula(self, table: str, col: str, formula_text: str) -> None:
+    """
+    Parse and register a formula for a column.
+
+    Args:
+        table: Table name.
+        col: Column name.
+        formula_text: The formula expression string.
+
+    Raises:
+        FormulaSyntaxError: If the formula is syntactically invalid.
+        FormulaCycleError: If the formula would create a circular dependency.
+    """
+    formula_text = formula_text.strip() if formula_text else ""
+    if not formula_text:
+      self.remove_formula(table, col)
+      return
+    
+    formula = parse_formula(formula_text)
+    if formula is None:
+      self.remove_formula(table, col)
+      return
+    
+    # Registers in DAG and raises FormulaCycleError if cycle detected
+    self._graph.add_formula(table, col, formula)
+    self._formulas[(table, col)] = formula
+    
+    logger.debug("Formula set for %s.%s: %s", table, col, formula_text)
+  
+  def remove_formula(self, table: str, col: str) -> None:
+    """
+    Remove a formula column from the engine.
+
+    Args:
+        table: Table name.
+        col: Column name.
+    """
+    self._graph.remove_formula(table, col)
+    self._formulas.pop((table, col), None)
+  
+  def mark_data_changed(
+      self,
+      table: str,
+      col: str,
+      rows: Optional[list[int]] = None,
+  ) -> None:
+    """
+    Mark a column's data as changed, propagating dirty flags.
+
+    Call this when source data is modified so that dependent formula
+    columns are re-evaluated on next render.
+
+    Args:
+        table: Table name.
+        col: Column name.
+        rows: Specific row indices that changed. None means all rows.
+    """
+    self._graph.mark_dirty(table, col, rows)
+  
+  def recalculate_if_needed(self, table: str, store: Any) -> bool:
+    """
+    Recalculate all dirty formula columns for a table.
+
+    Should be called at the start of ``mk_body_content_page()`` to
+    ensure formula columns are up-to-date before rendering.
+
+    Updates ``store.ns_fast_access`` and ``store.ns_row_data`` in place.
+
+    Args:
+        table: Table name.
+        store: The DatagridStore instance for this table.
+
+    Returns:
+        True if any columns were recalculated, False otherwise.
+    """
+    dirty_nodes = self._graph.get_calculation_order(table=table)
+    
+    if not dirty_nodes:
+      return False
+    
+    for node in dirty_nodes:
+      formula = node.formula
+      if formula is None:
+        continue
+      self._evaluate_column(table, node.column, formula, store)
+      self._graph.clear_dirty(node.node_id)
+    
+    # Rebuild ns_row_data after recalculation
+    if dirty_nodes and store.ns_fast_access:
+      self._rebuild_row_data(store)
+    
+    return True
+  
+  def has_formula(self, table: str, col: str) -> bool:
+    """
+    Check if a column has a formula registered.
+
+    Args:
+        table: Table name.
+        col: Column name.
+
+    Returns:
+        True if the column has a registered formula.
+    """
+    return self._graph.has_formula(table, col)
+  
+  def get_formula_text(self, table: str, col: str) -> Optional[str]:
+    """
+    Get the source text of a registered formula.
+
+    Args:
+        table: Table name.
+        col: Column name.
+
+    Returns:
+        Formula source text or None if not registered.
+    """
+    formula = self._formulas.get((table, col))
+    return formula.source_text if formula else None
+  
+  # ==================== Private helpers ====================
+  
+  def _evaluate_column(
+      self,
+      table: str,
+      col: str,
+      formula: FormulaDefinition,
+      store: Any,
+  ) -> None:
+    """
+    Evaluate a formula column row-by-row and update ns_fast_access.
+
+    Args:
+        table: Table name.
+        col: Column name.
+        formula: The parsed FormulaDefinition.
+        store: The DatagridStore with ns_fast_access and ns_row_data.
+    """
+    if store.ns_row_data is None or len(store.ns_row_data) == 0:
+      return
+    
+    n_rows = len(store.ns_row_data)
+    resolver = self._make_cross_table_resolver(table)
+    evaluator = FormulaEvaluator(cross_table_resolver=resolver)
+    
+    # Ensure ns_fast_access exists before the loop so that formula columns
+    # evaluated earlier in the same pass are visible to subsequent columns.
+    if store.ns_fast_access is None:
+      store.ns_fast_access = {}
+    
+    results = np.empty(n_rows, dtype=object)
+    
+    for row_index in range(n_rows):
+      # Build row_data from ns_fast_access so that formula columns evaluated
+      # earlier in this pass (e.g. B) are available to dependent columns (e.g. C).
+      row_data = {
+          c: arr[row_index]
+          for c, arr in store.ns_fast_access.items()
+          if arr is not None and row_index < len(arr)
+      }
+      results[row_index] = evaluator.evaluate(formula, row_data, row_index)
+    
+    store.ns_fast_access[col] = results
+    
+    logger.debug("Evaluated formula column %s.%s (%d rows)", table, col, n_rows)
+  
+  def _rebuild_row_data(self, store: Any) -> None:
+    """
+    Rebuild ns_row_data to include formula column results.
+
+    This ensures formula values are available to dependent formulas
+    in subsequent evaluation passes.
+
+    Args:
+        store: The DatagridStore to update.
+    """
+    if store.ns_fast_access is None:
+      return
+    
+    n_rows = len(store.ns_row_data)
+    for row_index in range(n_rows):
+      row = store.ns_row_data[row_index]
+      for col, arr in store.ns_fast_access.items():
+        if arr is not None and row_index < len(arr):
+          row[col] = arr[row_index]
+  
+  def _make_cross_table_resolver(self, current_table: str):
+    """
+    Create a cross-table resolver callback for the given table context.
+
+    Resolution strategy:
+    1. Explicit WHERE clause: scan remote column for matching rows.
+    2. Implicit join by ``id`` column: match rows where both tables share
+       the same id value.
+    3. Fallback: match by row_index.
+
+    Args:
+        current_table: The table that contains the formula.
+
+    Returns:
+        A callable ``resolver(table, column, where_clause, row_index) -> value``.
+    """
+    
+    def resolver(
+        remote_table: str,
+        remote_column: str,
+        where_clause: Optional[WhereClause],
+        row_index: int,
+    ) -> Any:
+      if self._registry_resolver is None:
+        logger.warning(
+          "No registry_resolver set for cross-table ref %s.%s",
+          remote_table, remote_column,
+        )
+        return None
+      
+      remote_store = self._registry_resolver(remote_table)
+      if remote_store is None:
+        logger.warning("Table '%s' not found in registry", remote_table)
+        return None
+      
+      ns = remote_store.ns_fast_access
+      if not ns or remote_column not in ns:
+        logger.debug(
+          "Column '%s' not found in table '%s'", remote_column, remote_table
+        )
+        return None
+      
+      remote_array = ns[remote_column]
+      
+      # Strategy 1: Explicit WHERE clause
+      if where_clause is not None:
+        return self._resolve_with_where(
+          where_clause, remote_store, remote_column,
+          remote_array, current_table, row_index,
+        )
+      
+      # Strategy 2: Implicit join by 'id' column
+      current_store = self._registry_resolver(current_table)
+      if (
+          current_store is not None
+          and current_store.ns_fast_access is not None
+          and "id" in current_store.ns_fast_access
+          and "id" in ns
+      ):
+        local_id_arr = current_store.ns_fast_access["id"]
+        remote_id_arr = ns["id"]
+        if row_index < len(local_id_arr):
+          local_id = local_id_arr[row_index]
+          # Find first matching row in remote table
+          matches = np.where(remote_id_arr == local_id)[0]
+          if len(matches) > 0:
+            return remote_array[matches[0]]
+        return None
+      
+      # Strategy 3: Fallback — match by row_index
+      if row_index < len(remote_array):
+        return remote_array[row_index]
+      return None
+    
+    return resolver
+  
+  def _resolve_with_where(
+      self,
+      where_clause: WhereClause,
+      remote_store: Any,
+      remote_column: str,
+      remote_array: Any,
+      current_table: str,
+      row_index: int,
+  ) -> Any:
+    """
+    Resolve a cross-table reference using an explicit WHERE clause.
+
+    Args:
+        where_clause: The parsed WHERE clause.
+        remote_store: DatagridStore for the remote table.
+        remote_column: Column to return value from.
+        remote_array: numpy array of the remote column values.
+        current_table: Table containing the formula.
+        row_index: Current row being evaluated.
+
+    Returns:
+        The value from the first matching remote row, or None.
+    """
+    remote_ns = remote_store.ns_fast_access
+    if not remote_ns:
+      return None
+    
+    # Get the remote key column array
+    remote_key_col = where_clause.remote_column
+    if remote_key_col not in remote_ns:
+      logger.debug(
+        "WHERE key column '%s' not found in remote table", remote_key_col
+      )
+      return None
+    
+    remote_key_array = remote_ns[remote_key_col]
+    
+    # Get the local value to compare
+    current_store = self._registry_resolver(current_table) if self._registry_resolver else None
+    if current_store is None or current_store.ns_fast_access is None:
+      return None
+    
+    local_col = where_clause.local_column
+    if local_col not in current_store.ns_fast_access:
+      logger.debug("WHERE local column '%s' not found", local_col)
+      return None
+    
+    local_array = current_store.ns_fast_access[local_col]
+    if row_index >= len(local_array):
+      return None
+    
+    local_value = local_array[row_index]
+    
+    # Find matching rows
+    try:
+      matches = np.where(remote_key_array == local_value)[0]
+    except Exception:
+      matches = []
+    
+    if len(matches) == 0:
+      return None
+    
+    # Return value from first match (use aggregation functions for multi-row)
+    return remote_array[matches[0]]