Added DataServicesManager and DataService

This commit is contained in:
2026-02-27 21:10:11 +01:00
parent efbc5a59ff
commit 0a766581ed
16 changed files with 1465 additions and 126 deletions

View File

@@ -0,0 +1,37 @@
from dataclasses import dataclass, field
from myfasthtml.core.constants import ColumnType
@dataclass
class ColumnDefinition:
"""Data semantics of a DataGrid column.
Holds the structural and computational properties of a column.
Does not contain any UI-related attributes (width, visibility, formatting).
Those are stored in ColumnUiState within DatagridState.
Attributes:
col_id: Unique identifier for the column. Cannot be changed after creation.
col_index: Index of the column in the DataFrame. -1 for virtual columns
(Formula, RowIndex).
title: Display title of the column.
type: Column data type, determines rendering and mutation behaviour.
formula: DSL expression for ColumnType.Formula columns. Empty string otherwise.
"""
col_id: str
col_index: int
title: str = None
type: ColumnType = ColumnType.Text
formula: str = ""
def copy(self) -> "ColumnDefinition":
"""Return a shallow copy of this definition."""
return ColumnDefinition(
col_id=self.col_id,
col_index=self.col_index,
title=self.title,
type=self.type,
formula=self.formula,
)

View File

@@ -0,0 +1,380 @@
import logging
from typing import Optional
import numpy as np
import pandas as pd
from myfasthtml.core.constants import ColumnType, ROW_INDEX_ID
from myfasthtml.core.data.ColumnDefinition import ColumnDefinition
from myfasthtml.core.dbmanager import DbObject
from myfasthtml.core.instances import MultipleInstance
from myfasthtml.core.utils import make_safe_id, make_unique_safe_id
logger = logging.getLogger(__name__)
_COLUMN_TYPE_DEFAULTS = {
ColumnType.Number: 0,
ColumnType.Text: "",
ColumnType.Bool: False,
ColumnType.Datetime: pd.NaT,
}
class DataStore(DbObject):
"""Persistent storage for a DataGrid's tabular data.
Holds the DataFrame and its derived caches used for rendering and formula
evaluation. Contains no business logic — all mutations are performed by
DataService.
Attributes:
ne_df: The pandas DataFrame. Source of truth for non-formula columns.
ns_fast_access: Dict mapping col_id to a numpy array. O(1) column
lookup used by FormulaEngine and rendering.
ns_row_data: List of row dicts built from ns_fast_access. Used by
FormattingEngine for rule evaluation.
ns_total_rows: Cached total row count after filtering.
"""
def __init__(self, owner, save_state: bool = True):
with self.initializing():
super().__init__(owner, name=f"{owner.get_id()}#store", save_state=save_state)
self.ne_df = None
self.ns_fast_access = None
self.ns_row_data = None
self.ns_total_rows = None
class DataServiceState(DbObject):
"""Persistent state for DataService.
Stores the column definitions and the table name associated with the
DataGrid. Persists across sessions via DbObject.
Attributes:
columns: Ordered list of column data definitions.
table_name: Fully qualified table name used by FormulaEngine
(format: "namespace.name" or "name").
"""
def __init__(self, owner, save_state: bool = True):
with self.initializing():
super().__init__(owner, name="#state", save_state=save_state)
self.columns: list[ColumnDefinition] = []
self.table_name: str = ""
class DataService(MultipleInstance):
"""Data companion to DataGrid.
Owns the DataStore and the list of ColumnDefinition objects for one
DataGrid. All data mutations go through this class. Holds a reference to
DataServicesManager to access the shared FormulaEngine.
This class can exist and operate independently of any rendering component.
Attributes:
_state: Persistent state (columns, table_name).
_store: Persistent storage (DataFrame, caches).
"""
def __init__(self, parent, _id: Optional[str] = None, save_state: bool = True):
super().__init__(parent, _id=_id)
self._state = DataServiceState(self, save_state=save_state)
self._store = DataStore(self, save_state=save_state)
@property
def columns(self) -> list[ColumnDefinition]:
"""Return the list of column definitions."""
return self._state.columns
@property
def table_name(self) -> str:
"""Return the fully qualified table name used by FormulaEngine."""
return self._state.table_name
def set_table_name(self, table_name: str) -> None:
"""Update the table name (e.g. after a rename)."""
self._state.table_name = table_name
def get_store(self) -> DataStore:
"""Return the underlying DataStore."""
return self._store
def get_formula_engine(self):
"""Return the shared FormulaEngine from DataServicesManager."""
return self._parent.get_formula_engine()
# ------------------------------------------------------------------
# Data initialisation
# ------------------------------------------------------------------
def load_dataframe(self, df: pd.DataFrame, init_columns: bool = True) -> None:
"""Load a DataFrame into the store and initialise caches.
Args:
df: Source DataFrame. Column names are normalised to safe IDs.
init_columns: When True, build ColumnDefinition list from the
DataFrame columns and register any existing formula columns
with the FormulaEngine.
"""
if df is None:
return
df.columns = df.columns.map(make_safe_id)
self._store.ne_df = df
if init_columns:
self._state.columns = self._build_column_definitions(df)
self._state.save()
self._store.ns_fast_access = self._build_fast_access(df)
self._store.ns_row_data = df.to_dict(orient="records")
self._store.ns_total_rows = len(df)
self._store.save()
self._register_existing_formulas()
# ------------------------------------------------------------------
# Mutations
# ------------------------------------------------------------------
def add_column(self, col_def: ColumnDefinition) -> None:
"""Add a new column to the DataGrid data layer.
Assigns a unique safe col_id from the title. For Formula and RowIndex
columns, no DataFrame column is created. For all other types, a column
with a type-appropriate default value is added to the DataFrame.
Args:
col_def: Column definition. col_id will be set by this method.
"""
col_def.col_id = make_unique_safe_id(
col_def.title, [c.col_id for c in self._state.columns]
)
if col_def.type == ColumnType.Formula:
col_def.col_index = -1
self._state.columns.append(col_def)
self._state.save()
return
if col_def.type == ColumnType.RowIndex:
col_def.col_index = -1
self._state.columns.append(col_def)
if self._store.ne_df is not None:
self._store.ns_fast_access[col_def.col_id] = (
self._store.ne_df.index.to_numpy()
)
self._state.save()
self._store.save()
return
default_value = _COLUMN_TYPE_DEFAULTS.get(col_def.type, "")
col_def.col_index = (
len(self._store.ne_df.columns) if self._store.ne_df is not None else 0
)
self._state.columns.append(col_def)
if self._store.ne_df is not None:
self._store.ne_df[col_def.col_id] = default_value
self._store.ns_fast_access[col_def.col_id] = (
self._store.ne_df[col_def.col_id].to_numpy()
)
for row_dict in self._store.ns_row_data:
row_dict[col_def.col_id] = default_value
self._state.save()
self._store.save()
self._mark_changed(col_def.col_id)
def add_row(self, row_data: Optional[dict] = None) -> None:
"""Append a new row with incremental cache updates.
Creates default values for all non-virtual columns when row_data is
not provided. Marks formula columns dirty so ensure_ready() will
recalculate them on the next render.
Args:
row_data: Optional dict of {col_id: value}. Defaults to
type-appropriate values for each column.
"""
if self._store.ne_df is None:
return
new_index = len(self._store.ne_df)
if row_data is None:
row_data = {}
for col in self._state.columns:
if col.type not in (ColumnType.Formula, ColumnType.RowSelection_):
value = (
new_index
if col.type == ColumnType.RowIndex
else _COLUMN_TYPE_DEFAULTS.get(col.type, "")
)
row_data[col.col_id] = value
self._store.ne_df.loc[new_index] = row_data
for col_id, value in row_data.items():
if col_id in self._store.ns_fast_access:
self._store.ns_fast_access[col_id] = np.append(
self._store.ns_fast_access[col_id], value
)
else:
self._store.ns_fast_access[col_id] = np.array([value])
self._store.ns_row_data.append(row_data.copy())
self._store.ns_total_rows = len(self._store.ne_df)
self._store.save()
self._mark_all_formula_columns_dirty()
def set_data(self, col_id: str, row_index: int, value) -> None:
"""Update a single cell value.
Updates the DataFrame, fast-access cache, and row data dict, then
marks dependent formula columns dirty.
Args:
col_id: Column identifier.
row_index: Zero-based row index.
value: New cell value.
"""
if self._store.ne_df is None:
return
self._store.ne_df.at[row_index, col_id] = value
if self._store.ns_fast_access and col_id in self._store.ns_fast_access:
self._store.ns_fast_access[col_id][row_index] = value
if self._store.ns_row_data and row_index < len(self._store.ns_row_data):
self._store.ns_row_data[row_index][col_id] = value
self._store.save()
self._mark_changed(col_id, rows=[row_index])
# ------------------------------------------------------------------
# Formula management
# ------------------------------------------------------------------
def register_formula(self, col_id: str, formula_text: str) -> None:
"""Register or update a formula for a column with the FormulaEngine.
Args:
col_id: Column identifier.
formula_text: DSL formula expression.
"""
engine = self.get_formula_engine()
if engine is None:
return
try:
engine.set_formula(self._state.table_name, col_id, formula_text)
except Exception as e:
logger.warning("Failed to register formula for %s.%s: %s",
self._state.table_name, col_id, e)
def remove_formula(self, col_id: str) -> None:
"""Remove a formula for a column from the FormulaEngine.
Args:
col_id: Column identifier.
"""
engine = self.get_formula_engine()
if engine is None:
return
engine.remove_formula(self._state.table_name, col_id)
def ensure_ready(self) -> None:
"""Recalculate dirty formula columns before rendering.
Called by DataGrid.mk_body_content_page() to ensure formula columns
are up-to-date. No-op when no columns are dirty.
"""
engine = self.get_formula_engine()
if engine is None:
return
engine.recalculate_if_needed(self._state.table_name, self._store)
# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------
def _build_column_definitions(self, df: pd.DataFrame) -> list[ColumnDefinition]:
"""Build ColumnDefinition objects from DataFrame columns.
Args:
df: Source DataFrame with normalised column names.
Returns:
Ordered list of ColumnDefinition objects.
"""
return [
ColumnDefinition(
col_id=make_safe_id(col_id),
col_index=col_index,
title=col_id,
type=self._infer_column_type(df[make_safe_id(col_id)].dtype),
)
for col_index, col_id in enumerate(df.columns)
]
@staticmethod
def _infer_column_type(dtype) -> ColumnType:
"""Infer ColumnType from a pandas dtype."""
if pd.api.types.is_integer_dtype(dtype):
return ColumnType.Number
if pd.api.types.is_float_dtype(dtype):
return ColumnType.Number
if pd.api.types.is_bool_dtype(dtype):
return ColumnType.Bool
if pd.api.types.is_datetime64_any_dtype(dtype):
return ColumnType.Datetime
return ColumnType.Text
@staticmethod
def _build_fast_access(df: pd.DataFrame) -> dict:
"""Build ns_fast_access from a DataFrame.
Args:
df: Source DataFrame.
Returns:
Dict mapping col_id to numpy array, plus ROW_INDEX_ID.
"""
result = {col: df[col].to_numpy() for col in df.columns}
result[ROW_INDEX_ID] = df.index.to_numpy()
return result
def _register_existing_formulas(self) -> None:
"""Re-register all formula columns with the FormulaEngine."""
engine = self.get_formula_engine()
if engine is None:
return
for col_def in self._state.columns:
if col_def.formula:
self.register_formula(col_def.col_id, col_def.formula)
def _mark_changed(self, col_id: str, rows: Optional[list[int]] = None) -> None:
"""Notify FormulaEngine that a column's data has changed.
Args:
col_id: Changed column identifier.
rows: Optional list of changed row indices. None means all rows.
"""
engine = self.get_formula_engine()
if engine is None:
return
engine.mark_data_changed(self._state.table_name, col_id, rows)
def _mark_all_formula_columns_dirty(self) -> None:
"""Mark all formula columns dirty after a structural change (e.g. add_row)."""
engine = self.get_formula_engine()
if engine is None:
return
table = self._state.table_name
for col in self._state.columns:
if col.type == ColumnType.Formula and col.formula:
engine.mark_data_changed(table, col.col_id)

View File

@@ -0,0 +1,121 @@
import logging
from typing import Optional
from myfasthtml.core.data.DataService import DataService
from myfasthtml.core.formula.engine import FormulaEngine
from myfasthtml.core.instances import SingleInstance
logger = logging.getLogger(__name__)
class DataServicesManager(SingleInstance):
"""Session-scoped manager for all DataService instances.
Owns the shared FormulaEngine and acts as the single entry point for
creating and retrieving DataService instances. Provides the resolver
callback that allows the FormulaEngine to access any table's DataStore
by table name.
Access pattern (from any component):
manager = InstancesManager.get_by_type(session, DataServicesManager)
service = manager.get_service(grid_id)
"""
def __init__(self, parent=None, _id: Optional[str] = None):
if not getattr(self, "_is_new_instance", False):
return
super().__init__(parent, _id)
self._services: dict[str, DataService] = {}
self._formula_engine = FormulaEngine(registry_resolver=self._resolve_store_for_table)
# ------------------------------------------------------------------
# Service lifecycle
# ------------------------------------------------------------------
def create_service(self, table_name: str, _id=None, save_state: bool = True) -> DataService:
"""Create and register a new DataService for a DataGrid.
Called by DataGridsManager when a new grid is created.
Args:
table_name: Fully qualified table name ("namespace.name" or "name").
save_state: Whether to persist the DataService state to DB.
_id: Unique identifier of the DataGrid.
Returns:
The newly created DataService instance.
"""
service = DataService(self, _id=_id, save_state=save_state)
service.set_table_name(table_name)
self._services[service.get_id()] = service
logger.debug(f"DataService created for '{table_name}' (grid_id={service.get_id()})")
return service
def get_service(self, grid_id: str) -> Optional[DataService]:
"""Return the DataService for a given grid_id.
Args:
grid_id: Unique identifier of the DataGrid.
Returns:
DataService instance, or None if not found.
"""
return self._services.get(grid_id)
def restore_service(self, grid_id: str) -> Optional[DataService]:
"""Restore a DataService from persisted state on session restart.
Called by DataGrid on restart to re-attach its DataService.
The DataService state (columns, table_name) and DataStore (DataFrame)
are loaded from DB automatically via DbObject.
Args:
grid_id: Unique identifier of the DataGrid.
Returns:
The restored DataService instance.
"""
if grid_id in self._services:
return self._services[grid_id]
service = DataService(self, _id=grid_id)
self._services[grid_id] = service
logger.debug("DataService restored for grid_id=%s", grid_id)
return service
def remove_service(self, grid_id: str) -> None:
"""Unregister and discard a DataService.
Called by DataGridsManager when a grid is deleted.
Args:
grid_id: Unique identifier of the DataGrid.
"""
self._services.pop(grid_id, None)
logger.debug("DataService removed for grid_id=%s", grid_id)
# ------------------------------------------------------------------
# FormulaEngine
# ------------------------------------------------------------------
def get_formula_engine(self) -> FormulaEngine:
"""Return the shared FormulaEngine for this session."""
return self._formula_engine
def _resolve_store_for_table(self, table_name: str):
"""Resolve the DataStore for a given table name.
Used by FormulaEngine as the registry_resolver callback for
cross-table formula evaluation.
Args:
table_name: Fully qualified table name ("namespace.name").
Returns:
DataStore instance, or None if the table is not found.
"""
for service in self._services.values():
if service.table_name == table_name:
return service.get_store()
logger.warning(f"DataServicesManager: table '{table_name}' not found")
return None

View File

View File

@@ -1,89 +1,203 @@
"""
Metadata provider for DataGrid formatting DSL autocompletion.
Provides access to DataGrid metadata (columns, values, row counts)
for context-aware autocompletion.
Provides access to DataGrid metadata (columns, values, row counts, presets)
for context-aware autocompletion. Delegates live data queries to
DataServicesManager and holds global formatting presets.
"""
from typing import Any
import logging
from typing import Any, Optional
from myfasthtml.core.data.DataServicesManager import DataServicesManager
from myfasthtml.core.dsl.base_provider import BaseMetadataProvider
from myfasthtml.core.formatting.presets import DEFAULT_FORMATTER_PRESETS, DEFAULT_STYLE_PRESETS
from myfasthtml.core.instances import SingleInstance, InstancesManager
logger = logging.getLogger(__name__)
class DatagridMetadataProvider(BaseMetadataProvider):
"""
Protocol for providing DataGrid metadata to the autocompletion engine.
class DatagridMetadataProvider(SingleInstance, BaseMetadataProvider):
"""Concrete session-scoped metadata provider for DataGrid DSL engines.
Implementations must provide access to:
- Available DataGrids (tables)
- Column names for each DataGrid
- Distinct values for each column
- Row count for each DataGrid
- Style and format presets
Implements BaseMetadataProvider by delegating live data queries to
DataServicesManager. Also holds the global formatting presets and the
all_tables_formats rule applied to every table.
DataGrid names follow the pattern namespace.name (multi-level namespaces).
"""
def list_tables(self) -> list[str]:
Access pattern (from any component):
provider = InstancesManager.get_by_type(session, DatagridMetadataProvider)
Attributes:
style_presets: Dict of named style presets available in the DSL.
formatter_presets: Dict of named formatter presets available in the DSL.
all_tables_formats: Global format rules applied to all tables.
"""
Return the list of available DataGrid names.
Returns:
List of DataGrid names (e.g., ["app.orders", "app.customers"])
"""
...
def list_columns(self, table_name: str) -> list[str]:
"""
Return the column names for a specific DataGrid.
def __init__(self, parent=None, session: Optional[dict] = None,
_id: Optional[str] = None):
super().__init__(parent, session, _id)
with self.initializing():
self.style_presets: dict = DEFAULT_STYLE_PRESETS.copy()
self.formatter_presets: dict = DEFAULT_FORMATTER_PRESETS.copy()
self.all_tables_formats: list = []
Args:
table_name: The DataGrid name
# ------------------------------------------------------------------
# Table and column metadata — delegated to DataServicesManager
# ------------------------------------------------------------------
Returns:
List of column names (e.g., ["id", "amount", "status"])
"""
...
def list_column_values(self, table_name, column_name: str) -> list[Any]:
"""
Return the distinct values for a column in the current DataGrid.
def list_tables(self) -> list[str]:
"""Return the list of all registered table names.
This is used to suggest values in conditions like `value == |`.
Returns:
List of table names in "namespace.name" format.
"""
manager = self._get_data_services_manager()
if manager is None:
return []
return [s.table_name for s in manager._services.values() if s.table_name]
Args:
column_name: The column name
def list_columns(self, table_name: str) -> list[str]:
"""Return the column identifiers for a table.
Returns:
List of distinct values in the column
"""
...
def get_row_count(self, table_name: str) -> int:
"""
Return the number of rows in a DataGrid.
Args:
table_name: Fully qualified table name.
Used to suggest row indices for row scope and cell scope.
Returns:
List of col_id strings.
"""
service = self._get_service(table_name)
if service is None:
return []
return [c.col_id for c in service.columns]
Args:
table_name: The DataGrid name
def list_column_values(self, table_name: str, column_name: str) -> list[Any]:
"""Return the distinct values present in a column.
Returns:
Number of rows
"""
...
Args:
table_name: Fully qualified table name.
column_name: Column identifier.
def get_column_type(self, table_name: str, column_name: str):
"""
Return the type of a column.
Returns:
List of distinct values, empty list if not found.
"""
service = self._get_service(table_name)
if service is None:
return []
store = service.get_store()
if store.ne_df is None or column_name not in store.ne_df.columns:
return []
return store.ne_df[column_name].dropna().unique().tolist()
Used to filter suggestions based on column type.
def get_row_count(self, table_name: str) -> int:
"""Return the number of rows in a table.
Args:
table_name: The DataGrid name
column_name: The column name
Args:
table_name: Fully qualified table name.
Returns:
ColumnType enum value or None if not found
"""
...
Returns:
Row count, or 0 if not found.
"""
service = self._get_service(table_name)
if service is None:
return 0
store = service.get_store()
return store.ns_total_rows or 0
def get_column_type(self, table_name: str, column_name: str):
"""Return the ColumnType for a column.
Args:
table_name: Fully qualified table name.
column_name: Column identifier.
Returns:
ColumnType enum value, or None if not found.
"""
service = self._get_service(table_name)
if service is None:
return None
for col in service.columns:
if col.col_id == column_name:
return col.type
return None
# ------------------------------------------------------------------
# Preset metadata — held locally
# ------------------------------------------------------------------
def list_style_presets(self) -> list[str]:
"""Return the names of all registered style presets."""
return list(self.style_presets.keys())
def list_format_presets(self) -> list[str]:
"""Return the names of all registered formatter presets."""
return list(self.formatter_presets.keys())
def get_style_presets(self) -> dict:
"""Return the full style presets dict."""
return self.style_presets
def get_formatter_presets(self) -> dict:
"""Return the full formatter presets dict."""
return self.formatter_presets
def add_style_preset(self, name: str, preset: dict) -> None:
"""Add or update a named style preset.
Args:
name: Preset name.
preset: Style definition dict.
"""
self.style_presets[name] = preset
def add_formatter_preset(self, name: str, preset: dict) -> None:
"""Add or update a named formatter preset.
Args:
name: Preset name.
preset: Formatter definition dict.
"""
self.formatter_presets[name] = preset
def remove_style_preset(self, name: str) -> None:
"""Remove a style preset by name.
Args:
name: Preset name to remove.
"""
self.style_presets.pop(name, None)
def remove_formatter_preset(self, name: str) -> None:
"""Remove a formatter preset by name.
Args:
name: Preset name to remove.
"""
self.formatter_presets.pop(name, None)
# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------
def _get_data_services_manager(self) -> Optional[DataServicesManager]:
"""Return the DataServicesManager for this session."""
return InstancesManager.get_by_type(
self._session, DataServicesManager, default=None
)
def _get_service(self, table_name: str):
"""Return the DataService for a given table name.
Args:
table_name: Fully qualified table name.
Returns:
DataService instance, or None if not found.
"""
manager = self._get_data_services_manager()
if manager is None:
return None
for service in manager._services.values():
if service.table_name == table_name:
return service
return None

View File

@@ -75,7 +75,7 @@ FORMULA_GRAMMAR = r"""
where_clause: TABLE_COL_REF "=" COL_NAME
// TABLE_COL_REF matches "TableName.ColumnName" (dot-separated, no spaces)
TABLE_COL_REF: /[A-Za-z_][A-Za-z0-9_]*\.[A-Za-z_][A-Za-z0-9_]*/
TABLE_COL_REF: /[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)+/
COL_NAME: /[A-Za-z_][A-Za-z0-9_ ]*/
// ==================== Functions ====================

View File

@@ -171,16 +171,16 @@ class FormulaTransformer(Transformer):
# ==================== References ====================
def cross_ref_simple(self, items):
"""{ Table.Column }"""
"""{ Table.Column } or { namespace.Table.Column }"""
table_col = str(items[0])
table, column = table_col.split(".", 1)
table, column = table_col.rsplit(".", 1)
return CrossTableRef(table=table, column=column)
def cross_ref_where(self, items):
"""{ Table.Column WHERE remote_table.remote_col = local_col }"""
table_col = str(items[0])
where = items[1]
table, column = table_col.split(".", 1)
table, column = table_col.rsplit(".", 1)
return CrossTableRef(table=table, column=column, where_clause=where)
def column_ref(self, items):
@@ -192,7 +192,7 @@ class FormulaTransformer(Transformer):
"""TABLE_COL_REF = COL_NAME"""
remote_table_col = str(items[0])
local_col = str(items[1]).strip()
remote_table, remote_col = remote_table_col.split(".", 1)
remote_table, remote_col = remote_table_col.rsplit(".", 1)
return WhereClause(
remote_table=remote_table,
remote_column=remote_col,

View File

@@ -12,6 +12,7 @@ from typing import Any, Callable, Optional
import numpy as np
from myfasthtml.core.dsl.exceptions import DSLSyntaxError
from .dataclasses import FormulaDefinition, WhereClause
from .dependency_graph import DependencyGraph
from .dsl.parser import get_parser
@@ -43,8 +44,8 @@ def parse_formula(text: str) -> FormulaDefinition | None:
parser = get_parser()
tree = parser.parse(text)
if tree is None:
return None
raise DSLSyntaxError(message=f"Formula could not be parsed: '{text}'")
transformer = FormulaTransformer()
formula = transformer.transform(tree)
formula.source_text = text