From 0a766581eda5334490322b42a10be08c2047cbd4 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Fri, 27 Feb 2026 21:10:11 +0100 Subject: [PATCH] Added DataServicesManager and DataService --- docs/DataGrid Refactoring.md | 116 ++++++ src/myfasthtml/core/data/ColumnDefinition.py | 37 ++ src/myfasthtml/core/data/DataService.py | 380 ++++++++++++++++++ .../core/data/DataServicesManager.py | 121 ++++++ src/myfasthtml/core/data/__init__.py | 0 .../formatting/dsl/completion/provider.py | 242 ++++++++--- src/myfasthtml/core/formula/dsl/grammar.py | 2 +- .../core/formula/dsl/transformer.py | 10 +- src/myfasthtml/core/formula/engine.py | 5 +- tests/controls/test_datagridmanager.py | 5 +- tests/core/data/__init__.py | 0 tests/core/data/conftest.py | 44 ++ tests/core/data/test_dataservice.py | 213 ++++++++++ .../test_dataservice_formula_integration.py | 210 ++++++++++ tests/core/data/test_dataservicesmanager.py | 121 ++++++ tests/core/test_datagrid_registry.py | 85 ++-- 16 files changed, 1465 insertions(+), 126 deletions(-) create mode 100644 docs/DataGrid Refactoring.md create mode 100644 src/myfasthtml/core/data/ColumnDefinition.py create mode 100644 src/myfasthtml/core/data/DataService.py create mode 100644 src/myfasthtml/core/data/DataServicesManager.py create mode 100644 src/myfasthtml/core/data/__init__.py create mode 100644 tests/core/data/__init__.py create mode 100644 tests/core/data/conftest.py create mode 100644 tests/core/data/test_dataservice.py create mode 100644 tests/core/data/test_dataservice_formula_integration.py create mode 100644 tests/core/data/test_dataservicesmanager.py diff --git a/docs/DataGrid Refactoring.md b/docs/DataGrid Refactoring.md new file mode 100644 index 0000000..15514f9 --- /dev/null +++ b/docs/DataGrid Refactoring.md @@ -0,0 +1,116 @@ +# DataGrid Refactoring + +## Objective + +Clearly separate data management and rendering responsibilities in the DataGrid system. +The current architecture mixes data mutation, formula computation, and rendering in the +same `DataGrid` class, which complicates cross-table formula management and code reasoning. + +## Guiding Principles + +- `DataService` can exist without rendering. The reverse is not true. +- All data mutations go through `DataService`. +- Columns have two facets: data semantics (`ColumnDefinition`) and UI presentation (`ColumnUiState`). +- No more parent hierarchy where avoidable — access via `InstancesManager.get_by_type()`. +- The persistence key is `grid_id` (stable), not `table_name` (can change over time). + +--- + +## New Classes (`core/data/`) + +### `DataServicesManager` — SingleInstance + +- Owns the `FormulaEngine` (cross-table formula coordination) +- Creates `DataService` instances on demand from `DataGridsManager` +- Provides access to `DataService` instances by `grid_id` +- Provides the resolver callback for `FormulaEngine`: `grid_id → DataStore` + +### `DataService` — companion to `DataGrid` + +- Owns `DataStore` and `list[ColumnDefinition]` +- Holds a reference to `DataServicesManager` for `FormulaEngine` access +- Methods: `load_dataframe(df)`, `add_row()`, `add_column()`, `set_data(col_id, row_index, value)` +- Mutations call `mark_data_changed()` → set dirty flag +- `ensure_ready()` → recalculates formulas if dirty (called by `mk_body_content_page()`) +- Can exist without any rendering + +### `DataStore` — renamed from `DatagridStore` + +- Pure persistence: `ne_df`, `ns_fast_access`, `ns_row_data`, `ns_total_rows` +- `DbObject` with no business logic + +### `ColumnDefinition` + +- Data semantics: `col_id`, `title`, `type`, `formula`, `col_index` + +--- + +## Modified Classes + +### `DataGridsRegistry` — streamlined + +- Persistence only: `put()`, `remove()`, `get_all_entries()` +- **Loses**: `get_columns()`, `get_column_type()`, `get_column_values()`, `get_row_count()` + +### `DatagridMetadataProvider` — becomes a concrete SingleInstance + +- No longer abstract / interface (only one concrete implementation exists) +- Reads from `DataServicesManager` and `DataGridsRegistry` +- Holds: `style_presets`, `formatter_presets`, `all_tables_formats` +- Exposes: `list_tables()`, `list_columns()`, `list_column_values()`, `get_column_type()`, + `list_style_presets()`, `list_format_presets()` + +### `DataGridsManager` — pure UI + +- **Keeps**: `TreeView`, `TabsManager`, document state, `Commands` +- **Loses**: `FormulaEngine`, presets, `DatagridMetadataProvider`, `_resolve_store_for_table()` + +### `DataGrid` — pure rendering + +- **Keeps**: `mk_*`, `render()`, `__ft__()`, `_state`, `_settings` +- **Keeps**: `_apply_sort()`, `_apply_filter()`, `_get_filtered_df()` +- **Loses**: `add_new_row()`, `add_new_column()`, `init_from_dataframe()`, + `_recalculate_formulas()`, `_register_existing_formulas()`, `_df_store` +- Accesses its `DataService` via its `grid_id`: + `InstancesManager.get_by_type(DataServicesManager).get_service(grid_id)` +- `mk_body_content_page()` calls `data_service.ensure_ready()` before rendering + +### `DatagridState` + +- `columns` changes from `list[DataGridColumnState]` → `list[ColumnUiState]` +- Everything else remains unchanged + +### `DataGridColumnState` — split into two classes + +| Class | Belongs to | Fields | +|---|---|---| +| `ColumnDefinition` | `DataService` | `col_id`, `title`, `type`, `formula`, `col_index` | +| `ColumnUiState` | `DatagridState` | `col_id`, `width`, `visible`, `format` | + +--- + +## Structural Fix + +**Current bug**: `mark_data_changed()` is defined in `FormulaEngine` but is never called +by `DataGrid`. Formulas are only recalculated defensively at render time. + +**After refactoring**: +- Every mutation in `DataService` calls `mark_data_changed()` → dirty flag set +- `mk_body_content_page()` calls `data_service.ensure_ready()` → recalculates if dirty +- Multiple mutations before a render = a single recalculation + +--- + +## Progress Tracking + +- [x] Create `DataStore` (rename `DatagridStore`) +- [x] Create `ColumnDefinition` +- [x] Create `DataService` +- [x] Create `DataServicesManager` +- [x] Refactor `DataGridsRegistry` (streamline) +- [x] Refactor `DatagridMetadataProvider` (make concrete) +- [x] Refactor `DataGridsManager` (pure UI) +- [x] Refactor `DataGrid` (pure rendering, split `DataGridColumnState`) +- [x] Update tests +- [ ] Remove `init_from_dataframe` from `DataGrid` (kept temporarily for transition) +- [ ] Full split of `DataGridColumnState` into `ColumnDefinition` + `ColumnUiState` in `DatagridState` diff --git a/src/myfasthtml/core/data/ColumnDefinition.py b/src/myfasthtml/core/data/ColumnDefinition.py new file mode 100644 index 0000000..ed86cee --- /dev/null +++ b/src/myfasthtml/core/data/ColumnDefinition.py @@ -0,0 +1,37 @@ +from dataclasses import dataclass, field + +from myfasthtml.core.constants import ColumnType + + +@dataclass +class ColumnDefinition: + """Data semantics of a DataGrid column. + + Holds the structural and computational properties of a column. + Does not contain any UI-related attributes (width, visibility, formatting). + Those are stored in ColumnUiState within DatagridState. + + Attributes: + col_id: Unique identifier for the column. Cannot be changed after creation. + col_index: Index of the column in the DataFrame. -1 for virtual columns + (Formula, RowIndex). + title: Display title of the column. + type: Column data type, determines rendering and mutation behaviour. + formula: DSL expression for ColumnType.Formula columns. Empty string otherwise. + """ + + col_id: str + col_index: int + title: str = None + type: ColumnType = ColumnType.Text + formula: str = "" + + def copy(self) -> "ColumnDefinition": + """Return a shallow copy of this definition.""" + return ColumnDefinition( + col_id=self.col_id, + col_index=self.col_index, + title=self.title, + type=self.type, + formula=self.formula, + ) diff --git a/src/myfasthtml/core/data/DataService.py b/src/myfasthtml/core/data/DataService.py new file mode 100644 index 0000000..27c4562 --- /dev/null +++ b/src/myfasthtml/core/data/DataService.py @@ -0,0 +1,380 @@ +import logging +from typing import Optional + +import numpy as np +import pandas as pd + +from myfasthtml.core.constants import ColumnType, ROW_INDEX_ID +from myfasthtml.core.data.ColumnDefinition import ColumnDefinition +from myfasthtml.core.dbmanager import DbObject +from myfasthtml.core.instances import MultipleInstance +from myfasthtml.core.utils import make_safe_id, make_unique_safe_id + +logger = logging.getLogger(__name__) + +_COLUMN_TYPE_DEFAULTS = { + ColumnType.Number: 0, + ColumnType.Text: "", + ColumnType.Bool: False, + ColumnType.Datetime: pd.NaT, +} + + +class DataStore(DbObject): + """Persistent storage for a DataGrid's tabular data. + + Holds the DataFrame and its derived caches used for rendering and formula + evaluation. Contains no business logic — all mutations are performed by + DataService. + + Attributes: + ne_df: The pandas DataFrame. Source of truth for non-formula columns. + ns_fast_access: Dict mapping col_id to a numpy array. O(1) column + lookup used by FormulaEngine and rendering. + ns_row_data: List of row dicts built from ns_fast_access. Used by + FormattingEngine for rule evaluation. + ns_total_rows: Cached total row count after filtering. + """ + + def __init__(self, owner, save_state: bool = True): + with self.initializing(): + super().__init__(owner, name=f"{owner.get_id()}#store", save_state=save_state) + self.ne_df = None + self.ns_fast_access = None + self.ns_row_data = None + self.ns_total_rows = None + + +class DataServiceState(DbObject): + """Persistent state for DataService. + + Stores the column definitions and the table name associated with the + DataGrid. Persists across sessions via DbObject. + + Attributes: + columns: Ordered list of column data definitions. + table_name: Fully qualified table name used by FormulaEngine + (format: "namespace.name" or "name"). + """ + + def __init__(self, owner, save_state: bool = True): + with self.initializing(): + super().__init__(owner, name="#state", save_state=save_state) + self.columns: list[ColumnDefinition] = [] + self.table_name: str = "" + + +class DataService(MultipleInstance): + """Data companion to DataGrid. + + Owns the DataStore and the list of ColumnDefinition objects for one + DataGrid. All data mutations go through this class. Holds a reference to + DataServicesManager to access the shared FormulaEngine. + + This class can exist and operate independently of any rendering component. + + Attributes: + _state: Persistent state (columns, table_name). + _store: Persistent storage (DataFrame, caches). + """ + + def __init__(self, parent, _id: Optional[str] = None, save_state: bool = True): + super().__init__(parent, _id=_id) + self._state = DataServiceState(self, save_state=save_state) + self._store = DataStore(self, save_state=save_state) + + @property + def columns(self) -> list[ColumnDefinition]: + """Return the list of column definitions.""" + return self._state.columns + + @property + def table_name(self) -> str: + """Return the fully qualified table name used by FormulaEngine.""" + return self._state.table_name + + def set_table_name(self, table_name: str) -> None: + """Update the table name (e.g. after a rename).""" + self._state.table_name = table_name + + def get_store(self) -> DataStore: + """Return the underlying DataStore.""" + return self._store + + def get_formula_engine(self): + """Return the shared FormulaEngine from DataServicesManager.""" + return self._parent.get_formula_engine() + + # ------------------------------------------------------------------ + # Data initialisation + # ------------------------------------------------------------------ + + def load_dataframe(self, df: pd.DataFrame, init_columns: bool = True) -> None: + """Load a DataFrame into the store and initialise caches. + + Args: + df: Source DataFrame. Column names are normalised to safe IDs. + init_columns: When True, build ColumnDefinition list from the + DataFrame columns and register any existing formula columns + with the FormulaEngine. + """ + if df is None: + return + + df.columns = df.columns.map(make_safe_id) + self._store.ne_df = df + + if init_columns: + self._state.columns = self._build_column_definitions(df) + self._state.save() + + self._store.ns_fast_access = self._build_fast_access(df) + self._store.ns_row_data = df.to_dict(orient="records") + self._store.ns_total_rows = len(df) + self._store.save() + + self._register_existing_formulas() + + # ------------------------------------------------------------------ + # Mutations + # ------------------------------------------------------------------ + + def add_column(self, col_def: ColumnDefinition) -> None: + """Add a new column to the DataGrid data layer. + + Assigns a unique safe col_id from the title. For Formula and RowIndex + columns, no DataFrame column is created. For all other types, a column + with a type-appropriate default value is added to the DataFrame. + + Args: + col_def: Column definition. col_id will be set by this method. + """ + col_def.col_id = make_unique_safe_id( + col_def.title, [c.col_id for c in self._state.columns] + ) + + if col_def.type == ColumnType.Formula: + col_def.col_index = -1 + self._state.columns.append(col_def) + self._state.save() + return + + if col_def.type == ColumnType.RowIndex: + col_def.col_index = -1 + self._state.columns.append(col_def) + if self._store.ne_df is not None: + self._store.ns_fast_access[col_def.col_id] = ( + self._store.ne_df.index.to_numpy() + ) + self._state.save() + self._store.save() + return + + default_value = _COLUMN_TYPE_DEFAULTS.get(col_def.type, "") + col_def.col_index = ( + len(self._store.ne_df.columns) if self._store.ne_df is not None else 0 + ) + self._state.columns.append(col_def) + + if self._store.ne_df is not None: + self._store.ne_df[col_def.col_id] = default_value + self._store.ns_fast_access[col_def.col_id] = ( + self._store.ne_df[col_def.col_id].to_numpy() + ) + for row_dict in self._store.ns_row_data: + row_dict[col_def.col_id] = default_value + + self._state.save() + self._store.save() + self._mark_changed(col_def.col_id) + + def add_row(self, row_data: Optional[dict] = None) -> None: + """Append a new row with incremental cache updates. + + Creates default values for all non-virtual columns when row_data is + not provided. Marks formula columns dirty so ensure_ready() will + recalculate them on the next render. + + Args: + row_data: Optional dict of {col_id: value}. Defaults to + type-appropriate values for each column. + """ + if self._store.ne_df is None: + return + + new_index = len(self._store.ne_df) + + if row_data is None: + row_data = {} + for col in self._state.columns: + if col.type not in (ColumnType.Formula, ColumnType.RowSelection_): + value = ( + new_index + if col.type == ColumnType.RowIndex + else _COLUMN_TYPE_DEFAULTS.get(col.type, "") + ) + row_data[col.col_id] = value + + self._store.ne_df.loc[new_index] = row_data + + for col_id, value in row_data.items(): + if col_id in self._store.ns_fast_access: + self._store.ns_fast_access[col_id] = np.append( + self._store.ns_fast_access[col_id], value + ) + else: + self._store.ns_fast_access[col_id] = np.array([value]) + + self._store.ns_row_data.append(row_data.copy()) + self._store.ns_total_rows = len(self._store.ne_df) + self._store.save() + + self._mark_all_formula_columns_dirty() + + def set_data(self, col_id: str, row_index: int, value) -> None: + """Update a single cell value. + + Updates the DataFrame, fast-access cache, and row data dict, then + marks dependent formula columns dirty. + + Args: + col_id: Column identifier. + row_index: Zero-based row index. + value: New cell value. + """ + if self._store.ne_df is None: + return + + self._store.ne_df.at[row_index, col_id] = value + + if self._store.ns_fast_access and col_id in self._store.ns_fast_access: + self._store.ns_fast_access[col_id][row_index] = value + + if self._store.ns_row_data and row_index < len(self._store.ns_row_data): + self._store.ns_row_data[row_index][col_id] = value + + self._store.save() + self._mark_changed(col_id, rows=[row_index]) + + # ------------------------------------------------------------------ + # Formula management + # ------------------------------------------------------------------ + + def register_formula(self, col_id: str, formula_text: str) -> None: + """Register or update a formula for a column with the FormulaEngine. + + Args: + col_id: Column identifier. + formula_text: DSL formula expression. + """ + engine = self.get_formula_engine() + if engine is None: + return + try: + engine.set_formula(self._state.table_name, col_id, formula_text) + except Exception as e: + logger.warning("Failed to register formula for %s.%s: %s", + self._state.table_name, col_id, e) + + def remove_formula(self, col_id: str) -> None: + """Remove a formula for a column from the FormulaEngine. + + Args: + col_id: Column identifier. + """ + engine = self.get_formula_engine() + if engine is None: + return + engine.remove_formula(self._state.table_name, col_id) + + def ensure_ready(self) -> None: + """Recalculate dirty formula columns before rendering. + + Called by DataGrid.mk_body_content_page() to ensure formula columns + are up-to-date. No-op when no columns are dirty. + """ + engine = self.get_formula_engine() + if engine is None: + return + engine.recalculate_if_needed(self._state.table_name, self._store) + + # ------------------------------------------------------------------ + # Private helpers + # ------------------------------------------------------------------ + + def _build_column_definitions(self, df: pd.DataFrame) -> list[ColumnDefinition]: + """Build ColumnDefinition objects from DataFrame columns. + + Args: + df: Source DataFrame with normalised column names. + + Returns: + Ordered list of ColumnDefinition objects. + """ + return [ + ColumnDefinition( + col_id=make_safe_id(col_id), + col_index=col_index, + title=col_id, + type=self._infer_column_type(df[make_safe_id(col_id)].dtype), + ) + for col_index, col_id in enumerate(df.columns) + ] + + @staticmethod + def _infer_column_type(dtype) -> ColumnType: + """Infer ColumnType from a pandas dtype.""" + if pd.api.types.is_integer_dtype(dtype): + return ColumnType.Number + if pd.api.types.is_float_dtype(dtype): + return ColumnType.Number + if pd.api.types.is_bool_dtype(dtype): + return ColumnType.Bool + if pd.api.types.is_datetime64_any_dtype(dtype): + return ColumnType.Datetime + return ColumnType.Text + + @staticmethod + def _build_fast_access(df: pd.DataFrame) -> dict: + """Build ns_fast_access from a DataFrame. + + Args: + df: Source DataFrame. + + Returns: + Dict mapping col_id to numpy array, plus ROW_INDEX_ID. + """ + result = {col: df[col].to_numpy() for col in df.columns} + result[ROW_INDEX_ID] = df.index.to_numpy() + return result + + def _register_existing_formulas(self) -> None: + """Re-register all formula columns with the FormulaEngine.""" + engine = self.get_formula_engine() + if engine is None: + return + for col_def in self._state.columns: + if col_def.formula: + self.register_formula(col_def.col_id, col_def.formula) + + def _mark_changed(self, col_id: str, rows: Optional[list[int]] = None) -> None: + """Notify FormulaEngine that a column's data has changed. + + Args: + col_id: Changed column identifier. + rows: Optional list of changed row indices. None means all rows. + """ + engine = self.get_formula_engine() + if engine is None: + return + engine.mark_data_changed(self._state.table_name, col_id, rows) + + def _mark_all_formula_columns_dirty(self) -> None: + """Mark all formula columns dirty after a structural change (e.g. add_row).""" + engine = self.get_formula_engine() + if engine is None: + return + table = self._state.table_name + for col in self._state.columns: + if col.type == ColumnType.Formula and col.formula: + engine.mark_data_changed(table, col.col_id) diff --git a/src/myfasthtml/core/data/DataServicesManager.py b/src/myfasthtml/core/data/DataServicesManager.py new file mode 100644 index 0000000..7e6e168 --- /dev/null +++ b/src/myfasthtml/core/data/DataServicesManager.py @@ -0,0 +1,121 @@ +import logging +from typing import Optional + +from myfasthtml.core.data.DataService import DataService +from myfasthtml.core.formula.engine import FormulaEngine +from myfasthtml.core.instances import SingleInstance + +logger = logging.getLogger(__name__) + + +class DataServicesManager(SingleInstance): + """Session-scoped manager for all DataService instances. + + Owns the shared FormulaEngine and acts as the single entry point for + creating and retrieving DataService instances. Provides the resolver + callback that allows the FormulaEngine to access any table's DataStore + by table name. + + Access pattern (from any component): + manager = InstancesManager.get_by_type(session, DataServicesManager) + service = manager.get_service(grid_id) + """ + + def __init__(self, parent=None, _id: Optional[str] = None): + if not getattr(self, "_is_new_instance", False): + return + super().__init__(parent, _id) + self._services: dict[str, DataService] = {} + self._formula_engine = FormulaEngine(registry_resolver=self._resolve_store_for_table) + + # ------------------------------------------------------------------ + # Service lifecycle + # ------------------------------------------------------------------ + + def create_service(self, table_name: str, _id=None, save_state: bool = True) -> DataService: + """Create and register a new DataService for a DataGrid. + + Called by DataGridsManager when a new grid is created. + + Args: + table_name: Fully qualified table name ("namespace.name" or "name"). + save_state: Whether to persist the DataService state to DB. + _id: Unique identifier of the DataGrid. + + Returns: + The newly created DataService instance. + """ + service = DataService(self, _id=_id, save_state=save_state) + service.set_table_name(table_name) + self._services[service.get_id()] = service + logger.debug(f"DataService created for '{table_name}' (grid_id={service.get_id()})") + return service + + def get_service(self, grid_id: str) -> Optional[DataService]: + """Return the DataService for a given grid_id. + + Args: + grid_id: Unique identifier of the DataGrid. + + Returns: + DataService instance, or None if not found. + """ + return self._services.get(grid_id) + + def restore_service(self, grid_id: str) -> Optional[DataService]: + """Restore a DataService from persisted state on session restart. + + Called by DataGrid on restart to re-attach its DataService. + The DataService state (columns, table_name) and DataStore (DataFrame) + are loaded from DB automatically via DbObject. + + Args: + grid_id: Unique identifier of the DataGrid. + + Returns: + The restored DataService instance. + """ + if grid_id in self._services: + return self._services[grid_id] + + service = DataService(self, _id=grid_id) + self._services[grid_id] = service + logger.debug("DataService restored for grid_id=%s", grid_id) + return service + + def remove_service(self, grid_id: str) -> None: + """Unregister and discard a DataService. + + Called by DataGridsManager when a grid is deleted. + + Args: + grid_id: Unique identifier of the DataGrid. + """ + self._services.pop(grid_id, None) + logger.debug("DataService removed for grid_id=%s", grid_id) + + # ------------------------------------------------------------------ + # FormulaEngine + # ------------------------------------------------------------------ + + def get_formula_engine(self) -> FormulaEngine: + """Return the shared FormulaEngine for this session.""" + return self._formula_engine + + def _resolve_store_for_table(self, table_name: str): + """Resolve the DataStore for a given table name. + + Used by FormulaEngine as the registry_resolver callback for + cross-table formula evaluation. + + Args: + table_name: Fully qualified table name ("namespace.name"). + + Returns: + DataStore instance, or None if the table is not found. + """ + for service in self._services.values(): + if service.table_name == table_name: + return service.get_store() + logger.warning(f"DataServicesManager: table '{table_name}' not found") + return None diff --git a/src/myfasthtml/core/data/__init__.py b/src/myfasthtml/core/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/myfasthtml/core/formatting/dsl/completion/provider.py b/src/myfasthtml/core/formatting/dsl/completion/provider.py index b48fef7..2d436d3 100644 --- a/src/myfasthtml/core/formatting/dsl/completion/provider.py +++ b/src/myfasthtml/core/formatting/dsl/completion/provider.py @@ -1,89 +1,203 @@ """ Metadata provider for DataGrid formatting DSL autocompletion. -Provides access to DataGrid metadata (columns, values, row counts) -for context-aware autocompletion. +Provides access to DataGrid metadata (columns, values, row counts, presets) +for context-aware autocompletion. Delegates live data queries to +DataServicesManager and holds global formatting presets. """ -from typing import Any +import logging +from typing import Any, Optional +from myfasthtml.core.data.DataServicesManager import DataServicesManager from myfasthtml.core.dsl.base_provider import BaseMetadataProvider +from myfasthtml.core.formatting.presets import DEFAULT_FORMATTER_PRESETS, DEFAULT_STYLE_PRESETS +from myfasthtml.core.instances import SingleInstance, InstancesManager + +logger = logging.getLogger(__name__) -class DatagridMetadataProvider(BaseMetadataProvider): - """ - Protocol for providing DataGrid metadata to the autocompletion engine. +class DatagridMetadataProvider(SingleInstance, BaseMetadataProvider): + """Concrete session-scoped metadata provider for DataGrid DSL engines. - Implementations must provide access to: - - Available DataGrids (tables) - - Column names for each DataGrid - - Distinct values for each column - - Row count for each DataGrid - - Style and format presets + Implements BaseMetadataProvider by delegating live data queries to + DataServicesManager. Also holds the global formatting presets and the + all_tables_formats rule applied to every table. - DataGrid names follow the pattern namespace.name (multi-level namespaces). - """ - - def list_tables(self) -> list[str]: + Access pattern (from any component): + provider = InstancesManager.get_by_type(session, DatagridMetadataProvider) + + Attributes: + style_presets: Dict of named style presets available in the DSL. + formatter_presets: Dict of named formatter presets available in the DSL. + all_tables_formats: Global format rules applied to all tables. """ - Return the list of available DataGrid names. - Returns: - List of DataGrid names (e.g., ["app.orders", "app.customers"]) - """ - ... - - def list_columns(self, table_name: str) -> list[str]: - """ - Return the column names for a specific DataGrid. + def __init__(self, parent=None, session: Optional[dict] = None, + _id: Optional[str] = None): + super().__init__(parent, session, _id) + with self.initializing(): + self.style_presets: dict = DEFAULT_STYLE_PRESETS.copy() + self.formatter_presets: dict = DEFAULT_FORMATTER_PRESETS.copy() + self.all_tables_formats: list = [] - Args: - table_name: The DataGrid name + # ------------------------------------------------------------------ + # Table and column metadata — delegated to DataServicesManager + # ------------------------------------------------------------------ - Returns: - List of column names (e.g., ["id", "amount", "status"]) - """ - ... - - def list_column_values(self, table_name, column_name: str) -> list[Any]: - """ - Return the distinct values for a column in the current DataGrid. + def list_tables(self) -> list[str]: + """Return the list of all registered table names. - This is used to suggest values in conditions like `value == |`. + Returns: + List of table names in "namespace.name" format. + """ + manager = self._get_data_services_manager() + if manager is None: + return [] + return [s.table_name for s in manager._services.values() if s.table_name] - Args: - column_name: The column name + def list_columns(self, table_name: str) -> list[str]: + """Return the column identifiers for a table. - Returns: - List of distinct values in the column - """ - ... - - def get_row_count(self, table_name: str) -> int: - """ - Return the number of rows in a DataGrid. + Args: + table_name: Fully qualified table name. - Used to suggest row indices for row scope and cell scope. + Returns: + List of col_id strings. + """ + service = self._get_service(table_name) + if service is None: + return [] + return [c.col_id for c in service.columns] - Args: - table_name: The DataGrid name + def list_column_values(self, table_name: str, column_name: str) -> list[Any]: + """Return the distinct values present in a column. - Returns: - Number of rows - """ - ... + Args: + table_name: Fully qualified table name. + column_name: Column identifier. - def get_column_type(self, table_name: str, column_name: str): - """ - Return the type of a column. + Returns: + List of distinct values, empty list if not found. + """ + service = self._get_service(table_name) + if service is None: + return [] + store = service.get_store() + if store.ne_df is None or column_name not in store.ne_df.columns: + return [] + return store.ne_df[column_name].dropna().unique().tolist() - Used to filter suggestions based on column type. + def get_row_count(self, table_name: str) -> int: + """Return the number of rows in a table. - Args: - table_name: The DataGrid name - column_name: The column name + Args: + table_name: Fully qualified table name. - Returns: - ColumnType enum value or None if not found - """ - ... + Returns: + Row count, or 0 if not found. + """ + service = self._get_service(table_name) + if service is None: + return 0 + store = service.get_store() + return store.ns_total_rows or 0 + + def get_column_type(self, table_name: str, column_name: str): + """Return the ColumnType for a column. + + Args: + table_name: Fully qualified table name. + column_name: Column identifier. + + Returns: + ColumnType enum value, or None if not found. + """ + service = self._get_service(table_name) + if service is None: + return None + for col in service.columns: + if col.col_id == column_name: + return col.type + return None + + # ------------------------------------------------------------------ + # Preset metadata — held locally + # ------------------------------------------------------------------ + + def list_style_presets(self) -> list[str]: + """Return the names of all registered style presets.""" + return list(self.style_presets.keys()) + + def list_format_presets(self) -> list[str]: + """Return the names of all registered formatter presets.""" + return list(self.formatter_presets.keys()) + + def get_style_presets(self) -> dict: + """Return the full style presets dict.""" + return self.style_presets + + def get_formatter_presets(self) -> dict: + """Return the full formatter presets dict.""" + return self.formatter_presets + + def add_style_preset(self, name: str, preset: dict) -> None: + """Add or update a named style preset. + + Args: + name: Preset name. + preset: Style definition dict. + """ + self.style_presets[name] = preset + + def add_formatter_preset(self, name: str, preset: dict) -> None: + """Add or update a named formatter preset. + + Args: + name: Preset name. + preset: Formatter definition dict. + """ + self.formatter_presets[name] = preset + + def remove_style_preset(self, name: str) -> None: + """Remove a style preset by name. + + Args: + name: Preset name to remove. + """ + self.style_presets.pop(name, None) + + def remove_formatter_preset(self, name: str) -> None: + """Remove a formatter preset by name. + + Args: + name: Preset name to remove. + """ + self.formatter_presets.pop(name, None) + + # ------------------------------------------------------------------ + # Private helpers + # ------------------------------------------------------------------ + + def _get_data_services_manager(self) -> Optional[DataServicesManager]: + """Return the DataServicesManager for this session.""" + return InstancesManager.get_by_type( + self._session, DataServicesManager, default=None + ) + + def _get_service(self, table_name: str): + """Return the DataService for a given table name. + + Args: + table_name: Fully qualified table name. + + Returns: + DataService instance, or None if not found. + """ + manager = self._get_data_services_manager() + if manager is None: + return None + for service in manager._services.values(): + if service.table_name == table_name: + return service + return None diff --git a/src/myfasthtml/core/formula/dsl/grammar.py b/src/myfasthtml/core/formula/dsl/grammar.py index e590a9b..b11455e 100644 --- a/src/myfasthtml/core/formula/dsl/grammar.py +++ b/src/myfasthtml/core/formula/dsl/grammar.py @@ -75,7 +75,7 @@ FORMULA_GRAMMAR = r""" where_clause: TABLE_COL_REF "=" COL_NAME // TABLE_COL_REF matches "TableName.ColumnName" (dot-separated, no spaces) - TABLE_COL_REF: /[A-Za-z_][A-Za-z0-9_]*\.[A-Za-z_][A-Za-z0-9_]*/ + TABLE_COL_REF: /[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)+/ COL_NAME: /[A-Za-z_][A-Za-z0-9_ ]*/ // ==================== Functions ==================== diff --git a/src/myfasthtml/core/formula/dsl/transformer.py b/src/myfasthtml/core/formula/dsl/transformer.py index edd3fdb..49977da 100644 --- a/src/myfasthtml/core/formula/dsl/transformer.py +++ b/src/myfasthtml/core/formula/dsl/transformer.py @@ -171,16 +171,16 @@ class FormulaTransformer(Transformer): # ==================== References ==================== def cross_ref_simple(self, items): - """{ Table.Column }""" + """{ Table.Column } or { namespace.Table.Column }""" table_col = str(items[0]) - table, column = table_col.split(".", 1) + table, column = table_col.rsplit(".", 1) return CrossTableRef(table=table, column=column) - + def cross_ref_where(self, items): """{ Table.Column WHERE remote_table.remote_col = local_col }""" table_col = str(items[0]) where = items[1] - table, column = table_col.split(".", 1) + table, column = table_col.rsplit(".", 1) return CrossTableRef(table=table, column=column, where_clause=where) def column_ref(self, items): @@ -192,7 +192,7 @@ class FormulaTransformer(Transformer): """TABLE_COL_REF = COL_NAME""" remote_table_col = str(items[0]) local_col = str(items[1]).strip() - remote_table, remote_col = remote_table_col.split(".", 1) + remote_table, remote_col = remote_table_col.rsplit(".", 1) return WhereClause( remote_table=remote_table, remote_column=remote_col, diff --git a/src/myfasthtml/core/formula/engine.py b/src/myfasthtml/core/formula/engine.py index 9aa7af8..45536a6 100644 --- a/src/myfasthtml/core/formula/engine.py +++ b/src/myfasthtml/core/formula/engine.py @@ -12,6 +12,7 @@ from typing import Any, Callable, Optional import numpy as np +from myfasthtml.core.dsl.exceptions import DSLSyntaxError from .dataclasses import FormulaDefinition, WhereClause from .dependency_graph import DependencyGraph from .dsl.parser import get_parser @@ -43,8 +44,8 @@ def parse_formula(text: str) -> FormulaDefinition | None: parser = get_parser() tree = parser.parse(text) if tree is None: - return None - + raise DSLSyntaxError(message=f"Formula could not be parsed: '{text}'") + transformer = FormulaTransformer() formula = transformer.transform(tree) formula.source_text = text diff --git a/tests/controls/test_datagridmanager.py b/tests/controls/test_datagridmanager.py index 92375ee..0422390 100644 --- a/tests/controls/test_datagridmanager.py +++ b/tests/controls/test_datagridmanager.py @@ -190,8 +190,9 @@ class TestDataGridsManagerBehaviour: doc = datagrid_manager._state.elements[0] # Verify DataGrid is registered - tables = datagrid_manager._registry.get_all_tables() - assert "Untitled.Sheet1" in tables, "DataGrid should be registered as Untitled.Sheet1" + entries = datagrid_manager._registry.get_all_entries() + assert doc.datagrid_id in entries, "DataGrid should be registered by grid_id" + assert entries[doc.datagrid_id] == ("Untitled", "Sheet1"), "Registry entry should match namespace and name" # Verify DataGrid exists in InstancesManager from myfasthtml.core.instances import InstancesManager diff --git a/tests/core/data/__init__.py b/tests/core/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/core/data/conftest.py b/tests/core/data/conftest.py new file mode 100644 index 0000000..a36a1ce --- /dev/null +++ b/tests/core/data/conftest.py @@ -0,0 +1,44 @@ +import shutil + +import pytest +from dbengine.handlers import handlers + +from myfasthtml.core.data.DataServicesManager import DataServicesManager +from myfasthtml.core.dbengine_utils import DataFrameHandler +from myfasthtml.core.dbmanager import DbManager +from myfasthtml.core.instances import SingleInstance, InstancesManager + + +@pytest.fixture(scope="session") +def session(): + handlers.register_handler(DataFrameHandler()) + return { + "user_info": { + "id": "test_tenant_id", + "email": "test@email.com", + "username": "test user", + "role": [], + } + } + + +@pytest.fixture +def parent(session): + instance = SingleInstance(session=session, _id="test_parent_id") + return instance + + +@pytest.fixture +def db_manager(parent): + shutil.rmtree("TestDb", ignore_errors=True) + db_manager_instance = DbManager(parent, root="TestDb", auto_register=True) + + yield db_manager_instance + + shutil.rmtree("TestDb", ignore_errors=True) + InstancesManager.reset() + + +@pytest.fixture +def dsm(parent, db_manager): + return DataServicesManager(parent, parent._session) diff --git a/tests/core/data/test_dataservice.py b/tests/core/data/test_dataservice.py new file mode 100644 index 0000000..86630c9 --- /dev/null +++ b/tests/core/data/test_dataservice.py @@ -0,0 +1,213 @@ +"""Unit tests for DataService.""" +import pandas as pd +import pytest + +from myfasthtml.core.constants import ColumnType +from myfasthtml.core.data.ColumnDefinition import ColumnDefinition + + +class TestDataInitialisation: + """Tests for the Data initialisation section of DataService.""" + + @pytest.fixture + def service(self, dsm): + return dsm.create_service("ns.tbl", save_state=False) + + def test_i_can_load_a_dataframe(self, service): + """load_dataframe() populates the store and column definitions.""" + df = pd.DataFrame({"name": ["Alice", "Bob"], "age": [30, 25]}) + + service.load_dataframe(df) + + assert service.get_store().ne_df is not None + assert service.get_store().ns_total_rows == 2 + assert len(service.columns) == 2 + + def test_i_can_load_an_empty_dataframe(self, service): + """load_dataframe() with empty DataFrame sets total_rows to 0.""" + service.load_dataframe(pd.DataFrame()) + + assert service.get_store().ns_total_rows == 0 + assert service.columns == [] + + def test_i_can_load_dataframe_without_reinitializing_columns(self, service): + """load_dataframe(init_columns=False) preserves existing column definitions.""" + df = pd.DataFrame({"a": [1]}) + service.load_dataframe(df) + original_columns = list(service.columns) + + df2 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + service.load_dataframe(df2, init_columns=False) + + assert service.columns == original_columns + + def test_i_can_load_none_dataframe_without_error(self, service): + """load_dataframe(None) is a no-op and does not raise. + + Why this matters: + - Early return on None protects against uninitialized callers. + - ne_df must remain None (no side effects on the store). + """ + service.load_dataframe(None) + + assert service.get_store().ne_df is None + + def test_i_can_load_dataframe_with_column_name_normalization(self, service): + """load_dataframe() normalizes column names to safe IDs via make_safe_id. + + Why this matters: + - Columns with spaces or special characters must be accessible as safe IDs. + - make_safe_id lowercases and replaces non-safe characters with underscores. + """ + df = pd.DataFrame({"First Name": ["Alice"], "Last Name": ["Smith"]}) + + service.load_dataframe(df) + + col_ids = [c.col_id for c in service.columns] + assert col_ids == ["first_name", "last_name"] + + +class TestMutations: + """Tests for the Mutations section of DataService.""" + + @pytest.fixture + def service(self, dsm): + svc = dsm.create_service("ns.mutations", save_state=False) + svc.load_dataframe(pd.DataFrame({"value": [1, 2, 3]})) + return svc + + def test_i_can_add_a_row(self, service): + """add_row() appends a row with default values and updates the caches.""" + service.add_row() + + assert service.get_store().ns_total_rows == 4 + assert len(service.get_store().ne_df) == 4 + + def test_i_can_add_a_row_with_custom_data(self, service): + """add_row() with explicit data stores the provided values.""" + service.add_row(row_data={"value": 99}) + + assert service.get_store().ne_df.iloc[-1]["value"] == 99 + + def test_i_can_set_data(self, service): + """set_data() updates the cell in the DataFrame, fast-access cache, and row data.""" + service.set_data("value", 1, 99) + + assert service.get_store().ne_df.at[1, "value"] == 99 + assert service.get_store().ns_fast_access["value"][1] == 99 + assert service.get_store().ns_row_data[1]["value"] == 99 + + @pytest.mark.parametrize("col_type, expected_default", [ + (ColumnType.Text, ""), + (ColumnType.Number, 0), + (ColumnType.Bool, False), + (ColumnType.Datetime, pd.NaT), + (ColumnType.Choice, ""), + (ColumnType.Enum, ""), + (ColumnType.RowSelection_, ""), + ]) + def test_i_can_add_column_with_correct_default_value(self, service, col_type, expected_default): + """add_column() creates a DataFrame column with the type-appropriate default value. + + Why these assertions matter: + - col_id in ne_df.columns: Confirms the column is materialized in the DataFrame. + - len(columns) == 2: Confirms the column is registered in the metadata. + - default value: Each type has a specific sentinel value; wrong defaults corrupt data. + - pd.isna() for Datetime: pd.NaT does not support equality comparison. + """ + col_def = ColumnDefinition(col_id="__new__", col_index=-1, title="New Col", type=col_type) + service.add_column(col_def) + + assert col_def.col_id in service.get_store().ne_df.columns + assert len(service.columns) == 2 + actual = service.get_store().ne_df[col_def.col_id].iloc[0] + if pd.isna(expected_default): + assert pd.isna(actual) + else: + assert actual == expected_default + + @pytest.mark.parametrize("col_type", [ColumnType.Formula, ColumnType.RowIndex]) + def test_i_can_add_virtual_column_without_dataframe_column(self, service, col_type): + """add_column() with virtual types does not create a DataFrame column. + + Why these assertions matter: + - col_id not in ne_df.columns: Virtual columns are computed, not stored in the DataFrame. + - col_index == -1: Sentinel value marking virtual columns. + - len(columns) == 2: Column is registered in the state metadata despite being virtual. + """ + col_def = ColumnDefinition(col_id="__new__", col_index=-1, title="Virtual", type=col_type) + service.add_column(col_def) + + assert col_def.col_id not in service.get_store().ne_df.columns + assert col_def.col_index == -1 + assert len(service.columns) == 2 + + def test_i_can_add_row_without_loaded_dataframe_without_error(self, dsm): + """add_row() is a no-op and does not raise when no DataFrame is loaded.""" + service = dsm.create_service("ns.nodf_row", save_state=False) + + service.add_row() + + assert service.get_store().ne_df is None + + def test_i_can_set_data_without_loaded_dataframe_without_error(self, dsm): + """set_data() is a no-op and does not raise when no DataFrame is loaded.""" + service = dsm.create_service("ns.nodf_set", save_state=False) + + service.set_data("x", 0, 42) + + assert service.get_store().ne_df is None + + +class TestFormulaManagement: + """Tests for the Formula management section of DataService.""" + + @pytest.fixture + def service(self, dsm): + svc = dsm.create_service("ns.formula", save_state=False) + svc.load_dataframe(pd.DataFrame({"a": [1, 2, 3]})) + return svc + + def test_i_can_get_table_name(self, service): + """table_name property returns the value set at creation.""" + assert service.table_name == "ns.formula" + + def test_i_can_update_table_name(self, service): + """set_table_name() updates the table name.""" + service.set_table_name("ns.new_name") + + assert service.table_name == "ns.new_name" + + def test_i_can_register_formula(self, service): + """register_formula() registers a formula in the shared FormulaEngine. + + Why these assertions matter: + - has_formula: Confirms the formula was registered in the engine's DAG. + - get_formula_text: Confirms the source expression is stored as-is. + """ + service.register_formula("computed", "{a} + 1") + + engine = service.get_formula_engine() + assert engine.has_formula("ns.formula", "computed") + assert engine.get_formula_text("ns.formula", "computed") == "{a} + 1" + + def test_i_can_remove_formula(self, service): + """remove_formula() unregisters a formula from the FormulaEngine.""" + service.register_formula("computed", "{a} + 1") + service.remove_formula("computed") + + engine = service.get_formula_engine() + assert not engine.has_formula("ns.formula", "computed") + + def test_i_cannot_register_invalid_formula(self, service): + """register_formula() with invalid DSL syntax does not register the formula. + + Why this matters: + - parse_formula() raises DSLSyntaxError when it cannot parse the expression. + - register_formula() catches the exception to protect the caller, but the + formula must remain absent from the engine — not silently removed. + """ + service.register_formula("computed", "invalid syntax without braces") + + engine = service.get_formula_engine() + assert not engine.has_formula("ns.formula", "computed") diff --git a/tests/core/data/test_dataservice_formula_integration.py b/tests/core/data/test_dataservice_formula_integration.py new file mode 100644 index 0000000..c2cd72c --- /dev/null +++ b/tests/core/data/test_dataservice_formula_integration.py @@ -0,0 +1,210 @@ +"""Integration tests for DataService formula evaluation through DataServicesManager. + +These tests exercise the full stack: DataServicesManager owns the FormulaEngine +and provides the registry_resolver that enables cross-table formula resolution. +Each test uses real DataService instances and real DataStore objects — no fakes. +""" +import pytest +import pandas as pd + +from myfasthtml.core.constants import ColumnType +from myfasthtml.core.data.ColumnDefinition import ColumnDefinition + + +class TestIntraTableFormula: + """Single-table formula evaluation through the DSM/DataService stack.""" + + @pytest.fixture + def service(self, dsm): + svc = dsm.create_service("ns.sales", save_state=False) + svc.load_dataframe(pd.DataFrame({"price": [10, 20, 30], "qty": [2, 3, 4]})) + return svc + + def test_i_can_evaluate_formula_on_single_table(self, service): + """register_formula() + ensure_ready() computes the column for all rows. + + Why these assertions matter: + - ns_fast_access["total"]: ensure_ready() writes results to the cache used by rendering. + - All three rows: verifies the formula is applied to every row, not just the first. + """ + service.register_formula("total", "{price} * {qty}") + service.ensure_ready() + + result = service.get_store().ns_fast_access["total"] + assert result[0] == 20 + assert result[1] == 60 + assert result[2] == 120 + + def test_i_can_reevaluate_formula_after_data_change(self, service): + """set_data() marks dependent formula columns dirty; ensure_ready() recomputes them. + + Why these assertions matter: + - result[0] updated: confirms the dirty flag propagated and row 0 was recomputed. + - result[1] unchanged: confirms only affected rows are recomputed (no unnecessary work). + """ + service.register_formula("total", "{price} * {qty}") + service.ensure_ready() + + service.set_data("price", 0, 100) + service.ensure_ready() + + result = service.get_store().ns_fast_access["total"] + assert result[0] == 200 + assert result[1] == 60 + + +class TestCrossTableFormula: + """Cross-table formula resolution via DataServicesManager.registry_resolver. + + Table names use namespace notation (e.g. "ns.products"). The DSL grammar + now supports multiple dots in TABLE_COL_REF; the transformer splits on the + last dot to separate the table name from the column name. + """ + + @pytest.fixture + def orders_service(self, dsm): + svc = dsm.create_service("ns.orders", save_state=False) + svc.load_dataframe(pd.DataFrame({"qty": [2, 3]})) + return svc + + @pytest.fixture + def products_service(self, dsm): + svc = dsm.create_service("ns.products", save_state=False) + svc.load_dataframe(pd.DataFrame({"price": [10, 20]})) + return svc + + def test_i_can_evaluate_cross_table_formula(self, orders_service, products_service): + """A formula in one service can reference a namespaced table from another service. + + Why these assertions matter: + - result[0] and result[1]: confirms the registry_resolver resolved "ns.products" + correctly and combined its data with the orders data row by row. + """ + orders_service.register_formula("total", "{ns.products.price} * {qty}") + orders_service.ensure_ready() + + result = orders_service.get_store().ns_fast_access["total"] + assert result[0] == 20 + assert result[1] == 60 + + def test_i_cannot_resolve_cross_table_formula_for_unknown_table(self, orders_service): + """A formula referencing an unregistered table resolves to None without raising. + + Why this matters: + - result is None: confirms the engine degrades gracefully when the resolver + returns None, instead of raising or producing corrupt values. + """ + orders_service.register_formula("total", "{ns.unknown_table.price} * {qty}") + orders_service.ensure_ready() + + result = orders_service.get_store().ns_fast_access["total"] + assert result[0] is None + assert result[1] is None + + +class TestCrossTableFormulaWhere: + """Cross-table formula resolution using an explicit WHERE clause. + + The WHERE clause scans the remote table for a row where remote_column == local_value, + enabling correct lookups regardless of row ordering between tables. + """ + + @pytest.fixture + def orders_service(self, dsm): + svc = dsm.create_service("ns.orders", save_state=False) + svc.load_dataframe(pd.DataFrame({"product_id": [2, 1], "qty": [3, 5]})) + return svc + + @pytest.fixture + def products_service(self, dsm): + svc = dsm.create_service("ns.products", save_state=False) + svc.load_dataframe(pd.DataFrame({"product_id": [1, 2], "price": [10, 20]})) + return svc + + def test_i_can_lookup_value_with_where_clause_non_sequential(self, orders_service, products_service): + """WHERE resolves the correct remote row even when tables are not aligned by position. + + Why these assertions matter: + - result[0] == 60: order row 0 has product_id=2, products row 1 has price=20 → 20*3=60. + - result[1] == 50: order row 1 has product_id=1, products row 0 has price=10 → 10*5=50. + Row-index fallback would return 10*3=30 and 20*5=100 — both wrong. + """ + orders_service.register_formula( + "total", + "{ns.products.price where ns.products.product_id = product_id} * {qty}" + ) + orders_service.ensure_ready() + + result = orders_service.get_store().ns_fast_access["total"] + assert result[0] == 60 + assert result[1] == 50 + + + def test_i_can_lookup_returns_none_when_no_match(self, orders_service, products_service): + """WHERE returns None when the local value has no matching row in the remote table. + + Why this matters: + - result[0] is None: product_id=2 exists in products, but product_id=99 does not. + - No exception is raised: the engine must degrade gracefully on missing lookups. + """ + orders_service_no_match = orders_service + orders_service_no_match.get_store().ns_fast_access["product_id"][0] = 99 + + orders_service_no_match.register_formula( + "total", + "{ns.products.price where ns.products.product_id = product_id} * {qty}" + ) + orders_service_no_match.ensure_ready() + + result = orders_service_no_match.get_store().ns_fast_access["total"] + assert result[0] is None + + +class TestFormulaLifecycle: + """End-to-end formula lifecycle: column creation, registration, and evaluation.""" + + @pytest.fixture + def service(self, dsm): + svc = dsm.create_service("ns.lifecycle", save_state=False) + svc.load_dataframe(pd.DataFrame({"a": [1, 2, 3]})) + return svc + + def test_i_can_add_formula_column_and_evaluate(self, service): + """add_column(Formula) + register_formula() + ensure_ready() produces computed values. + + Why these assertions matter: + - col_id in ns_fast_access: ensure_ready() must write the formula column into the cache. + - Values [2, 4, 6]: validates the formula expression is correctly applied to all rows. + """ + col_def = ColumnDefinition(col_id="__new__", col_index=-1, + title="Doubled", type=ColumnType.Formula, + formula="{a} * 2") + service.add_column(col_def) + service.register_formula(col_def.col_id, col_def.formula) + service.ensure_ready() + + result = service.get_store().ns_fast_access[col_def.col_id] + assert list(result) == [2, 4, 6] + + def test_i_can_evaluate_formula_after_adding_row(self, service): + """add_row() marks formula columns dirty; ensure_ready() computes the new row. + + Why these assertions matter: + - len(result) == 4: confirms the new row was appended and the cache extended. + - result[3] == 20: confirms the formula was recalculated for the new row (a=10, * 2). + - result[0] == 2: confirms existing rows are not corrupted by the recalculation. + """ + col_def = ColumnDefinition(col_id="__new__", col_index=-1, + title="Doubled", type=ColumnType.Formula, + formula="{a} * 2") + service.add_column(col_def) + service.register_formula(col_def.col_id, col_def.formula) + service.ensure_ready() + + service.add_row(row_data={"a": 10}) + service.ensure_ready() + + result = service.get_store().ns_fast_access[col_def.col_id] + assert len(result) == 4 + assert result[0] == 2 + assert result[3] == 20 diff --git a/tests/core/data/test_dataservicesmanager.py b/tests/core/data/test_dataservicesmanager.py new file mode 100644 index 0000000..2b63798 --- /dev/null +++ b/tests/core/data/test_dataservicesmanager.py @@ -0,0 +1,121 @@ +"""Unit tests for DataServicesManager.""" +import pandas as pd + + +class TestDataServicesManagerServiceLifecycle: + def test_i_can_create_a_service(self, dsm): + """create_service() returns a DataService accessible by grid_id.""" + service = dsm.create_service("ns.tbl", save_state=False) + + assert service is not None + assert service.get_id() is not None + assert dsm.get_service(service.get_id()) is service + + def test_i_can_create_service_with_correct_table_name(self, dsm): + """create_service() sets the table_name on the returned DataService. + + create_service() calls service.set_table_name() internally. + This test verifies the side effect is applied before returning the service. + """ + service = dsm.create_service("ns.my_table", save_state=False) + + assert service.table_name == "ns.my_table" + + def test_i_can_create_service_forcing_the_id(self, dsm): + """create_service() sets the table_name on the returned DataService. + + create_service() calls service.set_table_name() internally. + This test verifies the side effect is applied before returning the service. + """ + service = dsm.create_service("ns.my_table", _id="grid_id", save_state=False) + + assert service.get_id() == "grid_id" + + def test_i_can_get_a_service_by_grid_id(self, dsm): + """get_service() returns the correct service.""" + svc1 = dsm.create_service("ns.t1", _id="g1", save_state=False) + svc2 = dsm.create_service("ns.t2", _id="g2", save_state=False) + + assert dsm.get_service("g1") is svc1 + assert dsm.get_service("g2") is svc2 + + def test_i_cannot_get_a_nonexistent_service(self, dsm): + """get_service() returns None for unknown grid_id.""" + assert dsm.get_service("does_not_exist") is None + + def test_i_can_remove_a_service(self, dsm): + """remove_service() unregisters the service.""" + service = dsm.create_service("ns.rm", save_state=False) + dsm.remove_service(service.get_id()) + + assert dsm.get_service(service.get_id()) is None + + def test_i_can_remove_a_nonexistent_service_without_error(self, dsm): + """remove_service() on unknown grid_id does not raise.""" + dsm.remove_service("ghost") # should not raise + + def test_i_can_restore_a_service(self, dsm): + """restore_service() creates and registers a service if not already present.""" + service = dsm.restore_service("grid_restore") + + assert service is not None + assert dsm.get_service("grid_restore") is service + assert service.get_id() == "grid_restore" + + def test_i_can_restore_existing_service(self, dsm): + """restore_service() returns the existing service when already registered.""" + original = dsm.create_service("ns.e", _id="grid_exist", save_state=False) + restored = dsm.restore_service("grid_exist") + + assert restored is original + + +class TestDataServicesManagerFormulaEngine: + + def test_i_can_get_formula_engine(self, dsm): + """get_formula_engine() returns the shared FormulaEngine instance.""" + engine = dsm.get_formula_engine() + assert engine is not None + + def test_i_can_verify_shared_formula_engine(self, dsm): + """All services share the same FormulaEngine from DataServicesManager.""" + svc1 = dsm.create_service("ns.fe1", save_state=False) + svc2 = dsm.create_service("ns.fe2", save_state=False) + + assert svc1.get_formula_engine() is svc2.get_formula_engine() + assert svc1.get_formula_engine() is dsm.get_formula_engine() + + def test_i_can_resolve_store_by_table_name(self, dsm): + """FormulaEngine resolver finds the DataStore for a given table name.""" + service = dsm.create_service("ns.resolver", save_state=False) + df = pd.DataFrame({"a": [1, 2]}) + service.load_dataframe(df) + + store = dsm._resolve_store_for_table("ns.resolver") + + assert store is service.get_store() + + def test_i_can_resolve_correct_store_among_multiple_services(self, dsm): + """_resolve_store_for_table() identifies the right store when multiple services are registered. + + The resolver iterates over all registered services and must return the store + whose service has a matching table_name, not another service's store. + """ + svc_a = dsm.create_service("ns.table_a", save_state=False) + svc_b = dsm.create_service("ns.table_b", save_state=False) + + df = pd.DataFrame({"x": [10, 20]}) + svc_a.load_dataframe(df) + svc_b.load_dataframe(df.copy()) + + store_a = dsm._resolve_store_for_table("ns.table_a") + store_b = dsm._resolve_store_for_table("ns.table_b") + + assert store_a is svc_a.get_store() + assert store_b is svc_b.get_store() + assert store_a is not store_b + + def test_i_cannot_resolve_unknown_table(self, dsm): + """FormulaEngine resolver returns None for an unknown table name.""" + result = dsm._resolve_store_for_table("unknown.table") + assert result is None diff --git a/tests/core/test_datagrid_registry.py b/tests/core/test_datagrid_registry.py index 609229d..7782799 100644 --- a/tests/core/test_datagrid_registry.py +++ b/tests/core/test_datagrid_registry.py @@ -2,9 +2,7 @@ import shutil import pytest from dbengine.handlers import handlers -from pandas import DataFrame -from myfasthtml.controls.DataGrid import DataGrid, DatagridConf from myfasthtml.core.DataGridsRegistry import DataGridsRegistry, DATAGRIDS_REGISTRY_ENTRY_KEY from myfasthtml.core.dbengine_utils import DataFrameHandler from myfasthtml.core.dbmanager import DbManager @@ -31,7 +29,6 @@ def session(): @pytest.fixture def parent(session): instance = SingleInstance(session=session, _id="test_parent_id") - instance.get_formula_engine = lambda: None return instance @@ -39,76 +36,60 @@ def parent(session): def db_manager(parent): shutil.rmtree("TestDb", ignore_errors=True) db_manager_instance = DbManager(parent, root="TestDb", auto_register=True) - + yield db_manager_instance - + shutil.rmtree("TestDb", ignore_errors=True) InstancesManager.reset() -@pytest.fixture -def dg(parent): - # the table must be created - data = {"name": ["john", "jane"], "id": [1, 2]} - df = DataFrame(data) - dgc = DatagridConf("namespace", "table_name") - datagrid = DataGrid(parent, conf=dgc, save_state=True) - datagrid.init_from_dataframe(df, init_state=True) - yield datagrid - - datagrid.dispose() - - @pytest.fixture def dgr(parent, db_manager): return DataGridsRegistry(parent) -def test_entry_is_created_at_startup(db_manager, dgr, ): +def test_i_can_create_registry_with_empty_state(db_manager, dgr): + """Registry is initialised with an empty dict in DB.""" assert db_manager.exists_entry(DATAGRIDS_REGISTRY_ENTRY_KEY) assert clean_db_object(db_manager.load(DATAGRIDS_REGISTRY_ENTRY_KEY)) == {} -def test_i_can_put_a_table_in_registry(dgr): +def test_i_can_put_and_retrieve_entries(dgr): + """put() persists entries retrievable via get_all_entries().""" dgr.put("namespace", "name", "datagrid_id") dgr.put("namespace2", "name2", "datagrid_id2") - assert dgr.get_all_tables() == ["namespace.name", "namespace2.name2"] + + entries = dgr.get_all_entries() + assert "datagrid_id" in entries + assert "datagrid_id2" in entries + assert entries["datagrid_id"] == ("namespace", "name") + assert entries["datagrid_id2"] == ("namespace2", "name2") -def test_i_can_columns_names_for_a_table(dgr, dg): - expected = ["__row_index__", "name", "id"] if dg.get_state().row_index else ["name", "id"] - namespace, name = dg.get_settings().namespace, dg.get_settings().name - dgr.put(namespace, name, dg.get_id()) - - table_full_name = f"{namespace}.{name}" - assert dgr.get_columns(table_full_name) == expected +def test_i_can_remove_an_entry(dgr): + """remove() deletes the entry from the registry.""" + dgr.put("ns", "tbl", "grid_1") + assert "grid_1" in dgr.get_all_entries() + + dgr.remove("grid_1") + assert "grid_1" not in dgr.get_all_entries() -def test_i_can_get_columns_values(dgr, dg): - namespace, name = dg.get_settings().namespace, dg.get_settings().name - dgr.put(namespace, name, dg.get_id()) - - table_full_name = f"{namespace}.{name}" - assert dgr.get_column_values(table_full_name, "name") == ["john", "jane"] +def test_i_can_remove_nonexistent_entry_without_error(dgr): + """remove() on a missing id does not raise.""" + dgr.remove("does_not_exist") # should not raise -def test_i_can_get_row_count(dgr, dg): - namespace, name = dg.get_settings().namespace, dg.get_settings().name - dgr.put(namespace, name, dg.get_id()) - - table_full_name = f"{namespace}.{name}" - assert dgr.get_row_count(table_full_name) == 2 +def test_i_can_put_multiple_entries_and_get_all(dgr): + """get_all_entries() returns all registered grids.""" + dgr.put("ns1", "t1", "id1") + dgr.put("ns2", "t2", "id2") + dgr.put("ns3", "t3", "id3") + + entries = dgr.get_all_entries() + assert len(entries) == 3 -def test_i_can_manage_when_table_name_does_not_exist(dgr): - assert dgr.get_columns("namespace.name") == [] - assert dgr.get_row_count("namespace.name") == 0 - - -def test_i_can_manage_when_column_does_not_exist(dgr, dg): - namespace, name = dg.get_settings().namespace, dg.get_settings().name - dgr.put(namespace, name, dg.get_id()) - - table_full_name = f"{namespace}.{name}" - assert len(dgr.get_columns(table_full_name)) > 0 - assert dgr.get_column_values("namespace.name", "") == [] +def test_i_can_get_empty_entries_when_registry_is_empty(dgr): + """get_all_entries() returns empty dict when nothing registered.""" + assert dgr.get_all_entries() == {}