Optimized Datagrid.render()

This commit is contained in:
2026-01-10 22:54:02 +01:00
parent 47848bb2fd
commit a9eb23ad76
3 changed files with 319 additions and 52 deletions

141
benchmarks/profile_datagrid.py Executable file
View File

@@ -0,0 +1,141 @@
#!/usr/bin/env python3
"""
DataGrid Performance Profiling Script
Generates a 1000-row DataFrame and profiles the DataGrid.render() method
to identify performance bottlenecks.
Usage:
python benchmarks/profile_datagrid.py
"""
import cProfile
import pstats
from io import StringIO
import numpy as np
import pandas as pd
from myfasthtml.controls.DataGrid import DataGrid
from myfasthtml.core.instances import SingleInstance, InstancesManager
def generate_test_dataframe(rows=1000, cols=10):
"""Generate a test DataFrame with mixed column types."""
np.random.seed(42)
data = {
'ID': range(rows),
'Name': [f'Person_{i}' for i in range(rows)],
'Email': [f'user{i}@example.com' for i in range(rows)],
'Age': np.random.randint(18, 80, rows),
'Salary': np.random.uniform(30000, 150000, rows),
'Active': np.random.choice([True, False], rows),
'Score': np.random.uniform(0, 100, rows),
'Department': np.random.choice(['Sales', 'Engineering', 'Marketing', 'HR'], rows),
'Country': np.random.choice(['France', 'USA', 'Germany', 'UK', 'Spain'], rows),
'Rating': np.random.uniform(1.0, 5.0, rows),
}
# Add extra columns if needed
for i in range(cols - len(data)):
data[f'Extra_Col_{i}'] = np.random.random(rows)
return pd.DataFrame(data)
def profile_datagrid_render(df):
"""Profile the DataGrid render method."""
# Clear instances to start fresh
InstancesManager.instances.clear()
# Create a minimal session
session = {
"user_info": {
"id": "test_tenant_id",
"email": "test@email.com",
"username": "test user",
"role": [],
}
}
# Create root instance as parent
root = SingleInstance(parent=None, session=session, _id="profile-root")
# Create DataGrid (parent, settings, save_state, _id)
datagrid = DataGrid(root)
datagrid.init_from_dataframe(df)
# Profile the render call
profiler = cProfile.Profile()
profiler.enable()
# Execute render
html_output = datagrid.render()
profiler.disable()
return profiler, html_output
def print_profile_stats(profiler, top_n=30):
"""Print formatted profiling statistics."""
s = StringIO()
stats = pstats.Stats(profiler, stream=s)
print("\n" + "=" * 80)
print("PROFILING RESULTS - Top {} functions by cumulative time".format(top_n))
print("=" * 80 + "\n")
stats.sort_stats('cumulative')
stats.print_stats(top_n)
output = s.getvalue()
print(output)
# Extract total time
for line in output.split('\n'):
if 'function calls' in line:
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
print(line)
break
print("\n" + "=" * 80)
print("Top 10 by total time spent (time * ncalls)")
print("=" * 80 + "\n")
s = StringIO()
stats = pstats.Stats(profiler, stream=s)
stats.sort_stats('tottime')
stats.print_stats(10)
print(s.getvalue())
def main():
print("Generating test DataFrame (1000 rows × 10 columns)...")
df = generate_test_dataframe(rows=1000, cols=10)
print(f"DataFrame shape: {df.shape}")
print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024:.2f} KB\n")
print("Profiling DataGrid.render()...")
profiler, html_output = profile_datagrid_render(df)
print(f"\nHTML output length: {len(str(html_output))} characters")
print_profile_stats(profiler, top_n=30)
# Clean up instances
InstancesManager.reset()
print("\n✅ Profiling complete!")
print("\nNext steps:")
print("1. Identify the slowest functions in the 'cumulative time' section")
print("2. Look for functions called many times (high ncalls)")
print("3. Focus optimization on high cumtime + high ncalls functions")
if __name__ == "__main__":
main()

View File

@@ -1,7 +1,10 @@
import html
import re
from functools import lru_cache
from typing import Optional
import pandas as pd
from fasthtml.common import NotStr
from fasthtml.components import *
from myfasthtml.controls.BaseCommands import BaseCommands
@@ -11,10 +14,26 @@ from myfasthtml.controls.helpers import mk
from myfasthtml.core.constants import ColumnType, ROW_INDEX_ID, FooterAggregation, DATAGRID_PAGE_SIZE, FILTER_INPUT_CID
from myfasthtml.core.dbmanager import DbObject
from myfasthtml.core.instances import MultipleInstance
from myfasthtml.core.optimized_ft import OptimizedDiv
from myfasthtml.core.utils import make_safe_id
from myfasthtml.icons.fluent import checkbox_unchecked16_regular
from myfasthtml.icons.fluent_p2 import checkbox_checked16_regular
# OPTIMIZATION: Pre-compiled regex to detect HTML special characters
_HTML_SPECIAL_CHARS_REGEX = re.compile(r'[<>&"\']')
@lru_cache(maxsize=2)
def _mk_bool_cached(_value):
"""
OPTIMIZED: Cached boolean checkbox HTML generator.
Since there are only 2 possible values (True/False), this will only generate HTML twice.
"""
return NotStr(str(
Div(mk.icon(checkbox_checked16_regular if _value else checkbox_unchecked16_regular, can_select=False),
cls="dt2-cell-content-checkbox")
))
class DatagridState(DbObject):
def __init__(self, owner, save_state):
@@ -148,64 +167,84 @@ class DataGrid(MultipleInstance):
id=f"th_{self._id}"
)
def mk_body_cell_content(self, col_pos, row_index, col_def: DataGridColumnState):
def mk_body_cell_content(self, col_pos, row_index, col_def: DataGridColumnState, filter_keyword_lower=None):
"""
OPTIMIZED: Generate cell content with minimal object creation.
- Uses plain strings instead of Label objects when possible
- Accepts pre-computed filter_keyword_lower to avoid repeated dict lookups
- Avoids html.escape when not necessary
- Uses cached boolean HTML (_mk_bool_cached)
"""
def mk_bool(_value):
return Div(mk.icon(checkbox_checked16_regular if _value else checkbox_unchecked16_regular, can_select=False),
cls="dt2-cell-content-checkbox")
def mk_highlighted_text(value_str, css_class):
"""Return highlighted text as raw HTML string or tuple of Spans."""
if not filter_keyword_lower:
# OPTIMIZATION: Return plain HTML string instead of Label object
# Include "truncate text-sm" to match mk.label() behavior (ellipsis + font size)
return NotStr(f'<span class="{css_class} truncate text-sm">{value_str}</span>')
def mk_text(_value):
return mk.label(_value, cls="dt2-cell-content-text")
def mk_number(_value):
return mk.label(_value, cls="dt2-cell-content-number")
def process_cell_content(_value):
value_str = html.escape(str(_value))
if FILTER_INPUT_CID not in self._state.filtered or (
keyword := self._state.filtered[FILTER_INPUT_CID]) is None:
return value_str
index = value_str.lower().find(keyword.lower())
index = value_str.lower().find(filter_keyword_lower)
if index < 0:
return value_str
return NotStr(f'<span class="{css_class} truncate text-sm">{value_str}</span>')
len_keyword = len(keyword)
res = [Span(value_str[:index])] if index > 0 else []
res += [Span(value_str[index:index + len_keyword], cls="dt2-highlight-1")]
res += [Span(value_str[index + len_keyword:])] if len(value_str) > len_keyword else []
return tuple(res)
# Has highlighting - need to use Span objects
# Add "truncate text-sm" to match mk.label() behavior
len_keyword = len(filter_keyword_lower)
res = []
if index > 0:
res.append(Span(value_str[:index], cls=f"{css_class} text-sm"))
res.append(Span(value_str[index:index + len_keyword], cls=f"{css_class} text-sm dt2-highlight-1"))
if index + len_keyword < len(value_str):
res.append(Span(value_str[index + len_keyword:], cls=f"{css_class} text-sm"))
return Span(*res, cls=f"{css_class} truncate") if len(res) > 1 else res[0]
column_type = col_def.type
value = self._state.ns_fast_access[col_def.col_id][row_index]
# Boolean type - uses cached HTML (only 2 possible values)
if column_type == ColumnType.Bool:
content = mk_bool(value)
elif column_type == ColumnType.Number:
content = mk_number(process_cell_content(value))
elif column_type == ColumnType.RowIndex:
content = mk_number(row_index)
return _mk_bool_cached(value)
# RowIndex - simplest case, just return the number as plain HTML
if column_type == ColumnType.RowIndex:
return NotStr(f'<span class="dt2-cell-content-number truncate text-sm">{row_index}</span>')
# Convert value to string
value_str = str(value)
# OPTIMIZATION: Only escape if necessary (check for HTML special chars with pre-compiled regex)
if _HTML_SPECIAL_CHARS_REGEX.search(value_str):
value_str = html.escape(value_str)
# Number or Text type
if column_type == ColumnType.Number:
return mk_highlighted_text(value_str, "dt2-cell-content-number")
else:
content = mk_text(process_cell_content(value))
return mk_highlighted_text(value_str, "dt2-cell-content-text")
return content
def mk_body_cell(self, col_pos, row_index, col_def: DataGridColumnState):
def mk_body_cell(self, col_pos, row_index, col_def: DataGridColumnState, filter_keyword_lower=None):
"""
OPTIMIZED: Accepts pre-computed filter_keyword_lower to avoid repeated dict lookups.
OPTIMIZED: Uses OptimizedDiv instead of Div for faster rendering.
"""
if not col_def.usable:
return None
if not col_def.visible:
return Div(cls="dt2-col-hidden")
return OptimizedDiv(cls="dt2-col-hidden")
content = self.mk_body_cell_content(col_pos, row_index, col_def)
content = self.mk_body_cell_content(col_pos, row_index, col_def, filter_keyword_lower)
return Div(content,
return OptimizedDiv(content,
data_col=col_def.col_id,
style=f"width:{col_def.width}px;",
cls="dt2-cell")
def mk_body_content_page(self, page_index: int):
"""
OPTIMIZED: Extract filter keyword once instead of 10,000 times.
OPTIMIZED: Uses OptimizedDiv for rows instead of Div for faster rendering.
"""
df = self._df # self._get_filtered_df()
start = page_index * DATAGRID_PAGE_SIZE
end = start + DATAGRID_PAGE_SIZE
@@ -214,11 +253,16 @@ class DataGrid(MultipleInstance):
else:
last_row = None
rows = [Div(
*[self.mk_body_cell(col_pos, row_index, col_def) for col_pos, col_def in enumerate(self._state.columns)],
# OPTIMIZATION: Extract filter keyword once (was being checked 10,000 times)
filter_keyword = self._state.filtered.get(FILTER_INPUT_CID)
filter_keyword_lower = filter_keyword.lower() if filter_keyword else None
rows = [OptimizedDiv(
*[self.mk_body_cell(col_pos, row_index, col_def, filter_keyword_lower)
for col_pos, col_def in enumerate(self._state.columns)],
cls="dt2-row",
data_row=f"{row_index}",
id=f"tr_{self._id}-{row_index}",
_id=f"tr_{self._id}-{row_index}",
) for row_index in df.index[start:end]]
return rows

View File

@@ -0,0 +1,82 @@
"""
Optimized FastHTML-compatible elements that generate HTML directly.
These classes bypass FastHTML's overhead for performance-critical rendering
by generating HTML strings directly instead of creating full FastHTML objects.
"""
from functools import lru_cache
from fasthtml.common import NotStr
class OptimizedFt:
"""Lightweight FastHTML-compatible element that generates HTML directly."""
ATTR_MAP = {
"cls": "class",
"_id": "id",
}
def __init__(self, tag, *args, **kwargs):
self.tag = tag
self.children = args
self.attrs = {self.safe_attr(k): v for k, v in kwargs.items() if v is not None}
@staticmethod
@lru_cache(maxsize=128)
def safe_attr(attr_name):
"""Convert Python attribute names to HTML attribute names."""
attr_name = attr_name.replace("hx_", "hx-")
attr_name = attr_name.replace("data_", "data-")
return OptimizedFt.ATTR_MAP.get(attr_name, attr_name)
@staticmethod
def to_html_helper(item):
"""Convert any item to HTML string."""
if item is None:
return ""
elif isinstance(item, str):
return item
elif isinstance(item, (int, float, bool)):
return str(item)
elif isinstance(item, OptimizedFt):
return item.to_html()
elif isinstance(item, NotStr):
return str(item)
else:
raise Exception(f"Unsupported type: {type(item)}, {item=}")
def to_html(self):
"""Generate HTML string."""
# Build attributes
attrs_list = []
for k, v in self.attrs.items():
if v is False:
continue # Skip False attributes
if v is True:
attrs_list.append(k) # Boolean attribute
else:
# No need to escape v since we control the values (width, IDs, etc.)
attrs_list.append(f'{k}="{v}"')
attrs_str = ' ' + ' '.join(attrs_list) if attrs_list else ''
# Build children HTML
children_html = ''.join(self.to_html_helper(child) for child in self.children)
return f'<{self.tag}{attrs_str}>{children_html}</{self.tag}>'
def __ft__(self):
"""FastHTML compatibility - returns NotStr to avoid double escaping."""
return NotStr(self.to_html())
def __str__(self):
return self.to_html()
class OptimizedDiv(OptimizedFt):
"""Optimized Div element."""
def __init__(self, *args, **kwargs):
super().__init__("div", *args, **kwargs)