diff --git a/benchmarks/profile_datagrid.py b/benchmarks/profile_datagrid.py new file mode 100755 index 0000000..2d014d4 --- /dev/null +++ b/benchmarks/profile_datagrid.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +""" +DataGrid Performance Profiling Script + +Generates a 1000-row DataFrame and profiles the DataGrid.render() method +to identify performance bottlenecks. + +Usage: + python benchmarks/profile_datagrid.py +""" + +import cProfile +import pstats +from io import StringIO + +import numpy as np +import pandas as pd + +from myfasthtml.controls.DataGrid import DataGrid +from myfasthtml.core.instances import SingleInstance, InstancesManager + + +def generate_test_dataframe(rows=1000, cols=10): + """Generate a test DataFrame with mixed column types.""" + np.random.seed(42) + + data = { + 'ID': range(rows), + 'Name': [f'Person_{i}' for i in range(rows)], + 'Email': [f'user{i}@example.com' for i in range(rows)], + 'Age': np.random.randint(18, 80, rows), + 'Salary': np.random.uniform(30000, 150000, rows), + 'Active': np.random.choice([True, False], rows), + 'Score': np.random.uniform(0, 100, rows), + 'Department': np.random.choice(['Sales', 'Engineering', 'Marketing', 'HR'], rows), + 'Country': np.random.choice(['France', 'USA', 'Germany', 'UK', 'Spain'], rows), + 'Rating': np.random.uniform(1.0, 5.0, rows), + } + + # Add extra columns if needed + for i in range(cols - len(data)): + data[f'Extra_Col_{i}'] = np.random.random(rows) + + return pd.DataFrame(data) + + +def profile_datagrid_render(df): + """Profile the DataGrid render method.""" + + # Clear instances to start fresh + InstancesManager.instances.clear() + + # Create a minimal session + session = { + "user_info": { + "id": "test_tenant_id", + "email": "test@email.com", + "username": "test user", + "role": [], + } + } + + # Create root instance as parent + root = SingleInstance(parent=None, session=session, _id="profile-root") + + # Create DataGrid (parent, settings, save_state, _id) + datagrid = DataGrid(root) + datagrid.init_from_dataframe(df) + + # Profile the render call + profiler = cProfile.Profile() + profiler.enable() + + # Execute render + html_output = datagrid.render() + + profiler.disable() + + return profiler, html_output + + +def print_profile_stats(profiler, top_n=30): + """Print formatted profiling statistics.""" + s = StringIO() + stats = pstats.Stats(profiler, stream=s) + + print("\n" + "=" * 80) + print("PROFILING RESULTS - Top {} functions by cumulative time".format(top_n)) + print("=" * 80 + "\n") + + stats.sort_stats('cumulative') + stats.print_stats(top_n) + + output = s.getvalue() + print(output) + + # Extract total time + for line in output.split('\n'): + if 'function calls' in line: + print("\n" + "=" * 80) + print("SUMMARY") + print("=" * 80) + print(line) + break + + print("\n" + "=" * 80) + print("Top 10 by total time spent (time * ncalls)") + print("=" * 80 + "\n") + + s = StringIO() + stats = pstats.Stats(profiler, stream=s) + stats.sort_stats('tottime') + stats.print_stats(10) + print(s.getvalue()) + + +def main(): + print("Generating test DataFrame (1000 rows × 10 columns)...") + df = generate_test_dataframe(rows=1000, cols=10) + print(f"DataFrame shape: {df.shape}") + print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024:.2f} KB\n") + + print("Profiling DataGrid.render()...") + profiler, html_output = profile_datagrid_render(df) + + print(f"\nHTML output length: {len(str(html_output))} characters") + + print_profile_stats(profiler, top_n=30) + + # Clean up instances + InstancesManager.reset() + + print("\n✅ Profiling complete!") + print("\nNext steps:") + print("1. Identify the slowest functions in the 'cumulative time' section") + print("2. Look for functions called many times (high ncalls)") + print("3. Focus optimization on high cumtime + high ncalls functions") + + +if __name__ == "__main__": + main() diff --git a/src/myfasthtml/controls/DataGrid.py b/src/myfasthtml/controls/DataGrid.py index 34afd20..c33ed90 100644 --- a/src/myfasthtml/controls/DataGrid.py +++ b/src/myfasthtml/controls/DataGrid.py @@ -1,7 +1,10 @@ import html +import re +from functools import lru_cache from typing import Optional import pandas as pd +from fasthtml.common import NotStr from fasthtml.components import * from myfasthtml.controls.BaseCommands import BaseCommands @@ -11,10 +14,26 @@ from myfasthtml.controls.helpers import mk from myfasthtml.core.constants import ColumnType, ROW_INDEX_ID, FooterAggregation, DATAGRID_PAGE_SIZE, FILTER_INPUT_CID from myfasthtml.core.dbmanager import DbObject from myfasthtml.core.instances import MultipleInstance +from myfasthtml.core.optimized_ft import OptimizedDiv from myfasthtml.core.utils import make_safe_id from myfasthtml.icons.fluent import checkbox_unchecked16_regular from myfasthtml.icons.fluent_p2 import checkbox_checked16_regular +# OPTIMIZATION: Pre-compiled regex to detect HTML special characters +_HTML_SPECIAL_CHARS_REGEX = re.compile(r'[<>&"\']') + + +@lru_cache(maxsize=2) +def _mk_bool_cached(_value): + """ + OPTIMIZED: Cached boolean checkbox HTML generator. + Since there are only 2 possible values (True/False), this will only generate HTML twice. + """ + return NotStr(str( + Div(mk.icon(checkbox_checked16_regular if _value else checkbox_unchecked16_regular, can_select=False), + cls="dt2-cell-content-checkbox") + )) + class DatagridState(DbObject): def __init__(self, owner, save_state): @@ -148,64 +167,84 @@ class DataGrid(MultipleInstance): id=f"th_{self._id}" ) - def mk_body_cell_content(self, col_pos, row_index, col_def: DataGridColumnState): - - def mk_bool(_value): - return Div(mk.icon(checkbox_checked16_regular if _value else checkbox_unchecked16_regular, can_select=False), - cls="dt2-cell-content-checkbox") - - def mk_text(_value): - return mk.label(_value, cls="dt2-cell-content-text") - - def mk_number(_value): - return mk.label(_value, cls="dt2-cell-content-number") - - def process_cell_content(_value): - value_str = html.escape(str(_value)) - - if FILTER_INPUT_CID not in self._state.filtered or ( - keyword := self._state.filtered[FILTER_INPUT_CID]) is None: - return value_str - - index = value_str.lower().find(keyword.lower()) + def mk_body_cell_content(self, col_pos, row_index, col_def: DataGridColumnState, filter_keyword_lower=None): + """ + OPTIMIZED: Generate cell content with minimal object creation. + - Uses plain strings instead of Label objects when possible + - Accepts pre-computed filter_keyword_lower to avoid repeated dict lookups + - Avoids html.escape when not necessary + - Uses cached boolean HTML (_mk_bool_cached) + """ + + def mk_highlighted_text(value_str, css_class): + """Return highlighted text as raw HTML string or tuple of Spans.""" + if not filter_keyword_lower: + # OPTIMIZATION: Return plain HTML string instead of Label object + # Include "truncate text-sm" to match mk.label() behavior (ellipsis + font size) + return NotStr(f'{value_str}') + + index = value_str.lower().find(filter_keyword_lower) if index < 0: - return value_str - - len_keyword = len(keyword) - res = [Span(value_str[:index])] if index > 0 else [] - res += [Span(value_str[index:index + len_keyword], cls="dt2-highlight-1")] - res += [Span(value_str[index + len_keyword:])] if len(value_str) > len_keyword else [] - return tuple(res) - + return NotStr(f'{value_str}') + + # Has highlighting - need to use Span objects + # Add "truncate text-sm" to match mk.label() behavior + len_keyword = len(filter_keyword_lower) + res = [] + if index > 0: + res.append(Span(value_str[:index], cls=f"{css_class} text-sm")) + res.append(Span(value_str[index:index + len_keyword], cls=f"{css_class} text-sm dt2-highlight-1")) + if index + len_keyword < len(value_str): + res.append(Span(value_str[index + len_keyword:], cls=f"{css_class} text-sm")) + return Span(*res, cls=f"{css_class} truncate") if len(res) > 1 else res[0] + column_type = col_def.type value = self._state.ns_fast_access[col_def.col_id][row_index] - + + # Boolean type - uses cached HTML (only 2 possible values) if column_type == ColumnType.Bool: - content = mk_bool(value) - elif column_type == ColumnType.Number: - content = mk_number(process_cell_content(value)) - elif column_type == ColumnType.RowIndex: - content = mk_number(row_index) + return _mk_bool_cached(value) + + # RowIndex - simplest case, just return the number as plain HTML + if column_type == ColumnType.RowIndex: + return NotStr(f'{row_index}') + + # Convert value to string + value_str = str(value) + + # OPTIMIZATION: Only escape if necessary (check for HTML special chars with pre-compiled regex) + if _HTML_SPECIAL_CHARS_REGEX.search(value_str): + value_str = html.escape(value_str) + + # Number or Text type + if column_type == ColumnType.Number: + return mk_highlighted_text(value_str, "dt2-cell-content-number") else: - content = mk_text(process_cell_content(value)) - - return content + return mk_highlighted_text(value_str, "dt2-cell-content-text") - def mk_body_cell(self, col_pos, row_index, col_def: DataGridColumnState): + def mk_body_cell(self, col_pos, row_index, col_def: DataGridColumnState, filter_keyword_lower=None): + """ + OPTIMIZED: Accepts pre-computed filter_keyword_lower to avoid repeated dict lookups. + OPTIMIZED: Uses OptimizedDiv instead of Div for faster rendering. + """ if not col_def.usable: return None - + if not col_def.visible: - return Div(cls="dt2-col-hidden") - - content = self.mk_body_cell_content(col_pos, row_index, col_def) - - return Div(content, - data_col=col_def.col_id, - style=f"width:{col_def.width}px;", - cls="dt2-cell") + return OptimizedDiv(cls="dt2-col-hidden") + + content = self.mk_body_cell_content(col_pos, row_index, col_def, filter_keyword_lower) + + return OptimizedDiv(content, + data_col=col_def.col_id, + style=f"width:{col_def.width}px;", + cls="dt2-cell") def mk_body_content_page(self, page_index: int): + """ + OPTIMIZED: Extract filter keyword once instead of 10,000 times. + OPTIMIZED: Uses OptimizedDiv for rows instead of Div for faster rendering. + """ df = self._df # self._get_filtered_df() start = page_index * DATAGRID_PAGE_SIZE end = start + DATAGRID_PAGE_SIZE @@ -213,14 +252,19 @@ class DataGrid(MultipleInstance): last_row = df.index[end - 1] else: last_row = None - - rows = [Div( - *[self.mk_body_cell(col_pos, row_index, col_def) for col_pos, col_def in enumerate(self._state.columns)], + + # OPTIMIZATION: Extract filter keyword once (was being checked 10,000 times) + filter_keyword = self._state.filtered.get(FILTER_INPUT_CID) + filter_keyword_lower = filter_keyword.lower() if filter_keyword else None + + rows = [OptimizedDiv( + *[self.mk_body_cell(col_pos, row_index, col_def, filter_keyword_lower) + for col_pos, col_def in enumerate(self._state.columns)], cls="dt2-row", data_row=f"{row_index}", - id=f"tr_{self._id}-{row_index}", + _id=f"tr_{self._id}-{row_index}", ) for row_index in df.index[start:end]] - + return rows def mk_body(self): diff --git a/src/myfasthtml/core/optimized_ft.py b/src/myfasthtml/core/optimized_ft.py new file mode 100644 index 0000000..087f9fe --- /dev/null +++ b/src/myfasthtml/core/optimized_ft.py @@ -0,0 +1,82 @@ +""" +Optimized FastHTML-compatible elements that generate HTML directly. + +These classes bypass FastHTML's overhead for performance-critical rendering +by generating HTML strings directly instead of creating full FastHTML objects. +""" + +from functools import lru_cache + +from fasthtml.common import NotStr + + +class OptimizedFt: + """Lightweight FastHTML-compatible element that generates HTML directly.""" + + ATTR_MAP = { + "cls": "class", + "_id": "id", + } + + def __init__(self, tag, *args, **kwargs): + self.tag = tag + self.children = args + self.attrs = {self.safe_attr(k): v for k, v in kwargs.items() if v is not None} + + @staticmethod + @lru_cache(maxsize=128) + def safe_attr(attr_name): + """Convert Python attribute names to HTML attribute names.""" + attr_name = attr_name.replace("hx_", "hx-") + attr_name = attr_name.replace("data_", "data-") + return OptimizedFt.ATTR_MAP.get(attr_name, attr_name) + + @staticmethod + def to_html_helper(item): + """Convert any item to HTML string.""" + if item is None: + return "" + elif isinstance(item, str): + return item + elif isinstance(item, (int, float, bool)): + return str(item) + elif isinstance(item, OptimizedFt): + return item.to_html() + elif isinstance(item, NotStr): + return str(item) + else: + raise Exception(f"Unsupported type: {type(item)}, {item=}") + + def to_html(self): + """Generate HTML string.""" + # Build attributes + attrs_list = [] + for k, v in self.attrs.items(): + if v is False: + continue # Skip False attributes + if v is True: + attrs_list.append(k) # Boolean attribute + else: + # No need to escape v since we control the values (width, IDs, etc.) + attrs_list.append(f'{k}="{v}"') + + attrs_str = ' ' + ' '.join(attrs_list) if attrs_list else '' + + # Build children HTML + children_html = ''.join(self.to_html_helper(child) for child in self.children) + + return f'<{self.tag}{attrs_str}>{children_html}' + + def __ft__(self): + """FastHTML compatibility - returns NotStr to avoid double escaping.""" + return NotStr(self.to_html()) + + def __str__(self): + return self.to_html() + + +class OptimizedDiv(OptimizedFt): + """Optimized Div element.""" + + def __init__(self, *args, **kwargs): + super().__init__("div", *args, **kwargs)