Optimized Datagrid.render()

2026-01-10 22:54:02 +01:00
parent 47848bb2fd
commit a9eb23ad76
3 changed files with 319 additions and 52 deletions
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+"""
+DataGrid Performance Profiling Script
+
+Generates a 1000-row DataFrame and profiles the DataGrid.render() method
+to identify performance bottlenecks.
+
+Usage:
+    python benchmarks/profile_datagrid.py
+"""
+
+import cProfile
+import pstats
+from io import StringIO
+
+import numpy as np
+import pandas as pd
+
+from myfasthtml.controls.DataGrid import DataGrid
+from myfasthtml.core.instances import SingleInstance, InstancesManager
+
+
+def generate_test_dataframe(rows=1000, cols=10):
+  """Generate a test DataFrame with mixed column types."""
+  np.random.seed(42)
+  
+  data = {
+      'ID': range(rows),
+      'Name': [f'Person_{i}' for i in range(rows)],
+      'Email': [f'user{i}@example.com' for i in range(rows)],
+      'Age': np.random.randint(18, 80, rows),
+      'Salary': np.random.uniform(30000, 150000, rows),
+      'Active': np.random.choice([True, False], rows),
+      'Score': np.random.uniform(0, 100, rows),
+      'Department': np.random.choice(['Sales', 'Engineering', 'Marketing', 'HR'], rows),
+      'Country': np.random.choice(['France', 'USA', 'Germany', 'UK', 'Spain'], rows),
+      'Rating': np.random.uniform(1.0, 5.0, rows),
+  }
+  
+  # Add extra columns if needed
+  for i in range(cols - len(data)):
+    data[f'Extra_Col_{i}'] = np.random.random(rows)
+  
+  return pd.DataFrame(data)
+
+
+def profile_datagrid_render(df):
+  """Profile the DataGrid render method."""
+  
+  # Clear instances to start fresh
+  InstancesManager.instances.clear()
+  
+  # Create a minimal session
+  session = {
+      "user_info": {
+          "id": "test_tenant_id",
+          "email": "test@email.com",
+          "username": "test user",
+          "role": [],
+      }
+  }
+  
+  # Create root instance as parent
+  root = SingleInstance(parent=None, session=session, _id="profile-root")
+  
+  # Create DataGrid (parent, settings, save_state, _id)
+  datagrid = DataGrid(root)
+  datagrid.init_from_dataframe(df)
+  
+  # Profile the render call
+  profiler = cProfile.Profile()
+  profiler.enable()
+  
+  # Execute render
+  html_output = datagrid.render()
+  
+  profiler.disable()
+  
+  return profiler, html_output
+
+
+def print_profile_stats(profiler, top_n=30):
+  """Print formatted profiling statistics."""
+  s = StringIO()
+  stats = pstats.Stats(profiler, stream=s)
+  
+  print("\n" + "=" * 80)
+  print("PROFILING RESULTS - Top {} functions by cumulative time".format(top_n))
+  print("=" * 80 + "\n")
+  
+  stats.sort_stats('cumulative')
+  stats.print_stats(top_n)
+  
+  output = s.getvalue()
+  print(output)
+  
+  # Extract total time
+  for line in output.split('\n'):
+    if 'function calls' in line:
+      print("\n" + "=" * 80)
+      print("SUMMARY")
+      print("=" * 80)
+      print(line)
+      break
+  
+  print("\n" + "=" * 80)
+  print("Top 10 by total time spent (time * ncalls)")
+  print("=" * 80 + "\n")
+  
+  s = StringIO()
+  stats = pstats.Stats(profiler, stream=s)
+  stats.sort_stats('tottime')
+  stats.print_stats(10)
+  print(s.getvalue())
+
+
+def main():
+  print("Generating test DataFrame (1000 rows × 10 columns)...")
+  df = generate_test_dataframe(rows=1000, cols=10)
+  print(f"DataFrame shape: {df.shape}")
+  print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024:.2f} KB\n")
+  
+  print("Profiling DataGrid.render()...")
+  profiler, html_output = profile_datagrid_render(df)
+  
+  print(f"\nHTML output length: {len(str(html_output))} characters")
+  
+  print_profile_stats(profiler, top_n=30)
+  
+  # Clean up instances
+  InstancesManager.reset()
+  
+  print("\n✅ Profiling complete!")
+  print("\nNext steps:")
+  print("1. Identify the slowest functions in the 'cumulative time' section")
+  print("2. Look for functions called many times (high ncalls)")
+  print("3. Focus optimization on high cumtime + high ncalls functions")
+
+
+if __name__ == "__main__":
+  main()
@@ -1,7 +1,10 @@
 import html
+import re
+from functools import lru_cache
 from typing import Optional

 import pandas as pd
+from fasthtml.common import NotStr
 from fasthtml.components import *

 from myfasthtml.controls.BaseCommands import BaseCommands
@@ -11,10 +14,26 @@ from myfasthtml.controls.helpers import mk
 from myfasthtml.core.constants import ColumnType, ROW_INDEX_ID, FooterAggregation, DATAGRID_PAGE_SIZE, FILTER_INPUT_CID
 from myfasthtml.core.dbmanager import DbObject
 from myfasthtml.core.instances import MultipleInstance
+from myfasthtml.core.optimized_ft import OptimizedDiv
 from myfasthtml.core.utils import make_safe_id
 from myfasthtml.icons.fluent import checkbox_unchecked16_regular
 from myfasthtml.icons.fluent_p2 import checkbox_checked16_regular

+# OPTIMIZATION: Pre-compiled regex to detect HTML special characters
+_HTML_SPECIAL_CHARS_REGEX = re.compile(r'[<>&"\']')
+
+
+@lru_cache(maxsize=2)
+def _mk_bool_cached(_value):
+    """
+    OPTIMIZED: Cached boolean checkbox HTML generator.
+    Since there are only 2 possible values (True/False), this will only generate HTML twice.
+    """
+    return NotStr(str(
+        Div(mk.icon(checkbox_checked16_regular if _value else checkbox_unchecked16_regular, can_select=False),
+            cls="dt2-cell-content-checkbox")
+    ))
+

 class DatagridState(DbObject):
  def __init__(self, owner, save_state):
@@ -148,64 +167,84 @@ class DataGrid(MultipleInstance):
      id=f"th_{self._id}"
    )
  
-  def mk_body_cell_content(self, col_pos, row_index, col_def: DataGridColumnState):
+  def mk_body_cell_content(self, col_pos, row_index, col_def: DataGridColumnState, filter_keyword_lower=None):
+    """
+    OPTIMIZED: Generate cell content with minimal object creation.
+    - Uses plain strings instead of Label objects when possible
+    - Accepts pre-computed filter_keyword_lower to avoid repeated dict lookups
+    - Avoids html.escape when not necessary
+    - Uses cached boolean HTML (_mk_bool_cached)
+    """

-    def mk_bool(_value):
-      return Div(mk.icon(checkbox_checked16_regular if _value else checkbox_unchecked16_regular, can_select=False),
-                 cls="dt2-cell-content-checkbox")
+    def mk_highlighted_text(value_str, css_class):
+      """Return highlighted text as raw HTML string or tuple of Spans."""
+      if not filter_keyword_lower:
+        # OPTIMIZATION: Return plain HTML string instead of Label object
+        # Include "truncate text-sm" to match mk.label() behavior (ellipsis + font size)
+        return NotStr(f'<span class="{css_class} truncate text-sm">{value_str}</span>')

-    def mk_text(_value):
-      return mk.label(_value, cls="dt2-cell-content-text")
-    
-    def mk_number(_value):
-      return mk.label(_value, cls="dt2-cell-content-number")
-    
-    def process_cell_content(_value):
-      value_str = html.escape(str(_value))
-      
-      if FILTER_INPUT_CID not in self._state.filtered or (
-          keyword := self._state.filtered[FILTER_INPUT_CID]) is None:
-        return value_str
-      
-      index = value_str.lower().find(keyword.lower())
+      index = value_str.lower().find(filter_keyword_lower)
      if index < 0:
-        return value_str
+        return NotStr(f'<span class="{css_class} truncate text-sm">{value_str}</span>')

-      len_keyword = len(keyword)
-      res = [Span(value_str[:index])] if index > 0 else []
-      res += [Span(value_str[index:index + len_keyword], cls="dt2-highlight-1")]
-      res += [Span(value_str[index + len_keyword:])] if len(value_str) > len_keyword else []
-      return tuple(res)
+      # Has highlighting - need to use Span objects
+      # Add "truncate text-sm" to match mk.label() behavior
+      len_keyword = len(filter_keyword_lower)
+      res = []
+      if index > 0:
+        res.append(Span(value_str[:index], cls=f"{css_class} text-sm"))
+      res.append(Span(value_str[index:index + len_keyword], cls=f"{css_class} text-sm dt2-highlight-1"))
+      if index + len_keyword < len(value_str):
+        res.append(Span(value_str[index + len_keyword:], cls=f"{css_class} text-sm"))
+      return Span(*res, cls=f"{css_class} truncate") if len(res) > 1 else res[0]

    column_type = col_def.type
    value = self._state.ns_fast_access[col_def.col_id][row_index]

+    # Boolean type - uses cached HTML (only 2 possible values)
    if column_type == ColumnType.Bool:
-      content = mk_bool(value)
-    elif column_type == ColumnType.Number:
-      content = mk_number(process_cell_content(value))
-    elif column_type == ColumnType.RowIndex:
-      content = mk_number(row_index)
+      return _mk_bool_cached(value)
+
+    # RowIndex - simplest case, just return the number as plain HTML
+    if column_type == ColumnType.RowIndex:
+      return NotStr(f'<span class="dt2-cell-content-number truncate text-sm">{row_index}</span>')
+
+    # Convert value to string
+    value_str = str(value)
+
+    # OPTIMIZATION: Only escape if necessary (check for HTML special chars with pre-compiled regex)
+    if _HTML_SPECIAL_CHARS_REGEX.search(value_str):
+      value_str = html.escape(value_str)
+
+    # Number or Text type
+    if column_type == ColumnType.Number:
+      return mk_highlighted_text(value_str, "dt2-cell-content-number")
    else:
-      content = mk_text(process_cell_content(value))
+      return mk_highlighted_text(value_str, "dt2-cell-content-text")
  
-    return content
-  
-  def mk_body_cell(self, col_pos, row_index, col_def: DataGridColumnState):
+  def mk_body_cell(self, col_pos, row_index, col_def: DataGridColumnState, filter_keyword_lower=None):
+    """
+    OPTIMIZED: Accepts pre-computed filter_keyword_lower to avoid repeated dict lookups.
+    OPTIMIZED: Uses OptimizedDiv instead of Div for faster rendering.
+    """
    if not col_def.usable:
      return None

    if not col_def.visible:
-      return Div(cls="dt2-col-hidden")
+      return OptimizedDiv(cls="dt2-col-hidden")

-    content = self.mk_body_cell_content(col_pos, row_index, col_def)
+    content = self.mk_body_cell_content(col_pos, row_index, col_def, filter_keyword_lower)

-    return Div(content,
+    return OptimizedDiv(content,
                        data_col=col_def.col_id,
                        style=f"width:{col_def.width}px;",
                        cls="dt2-cell")
  
  def mk_body_content_page(self, page_index: int):
+    """
+    OPTIMIZED: Extract filter keyword once instead of 10,000 times.
+    OPTIMIZED: Uses OptimizedDiv for rows instead of Div for faster rendering.
+    """
    df = self._df  # self._get_filtered_df()
    start = page_index * DATAGRID_PAGE_SIZE
    end = start + DATAGRID_PAGE_SIZE
@@ -214,11 +253,16 @@ class DataGrid(MultipleInstance):
    else:
      last_row = None

-    rows = [Div(
-      *[self.mk_body_cell(col_pos, row_index, col_def) for col_pos, col_def in enumerate(self._state.columns)],
+    # OPTIMIZATION: Extract filter keyword once (was being checked 10,000 times)
+    filter_keyword = self._state.filtered.get(FILTER_INPUT_CID)
+    filter_keyword_lower = filter_keyword.lower() if filter_keyword else None
+
+    rows = [OptimizedDiv(
+      *[self.mk_body_cell(col_pos, row_index, col_def, filter_keyword_lower)
+        for col_pos, col_def in enumerate(self._state.columns)],
      cls="dt2-row",
      data_row=f"{row_index}",
-      id=f"tr_{self._id}-{row_index}",
+      _id=f"tr_{self._id}-{row_index}",
    ) for row_index in df.index[start:end]]

    return rows
@@ -0,0 +1,82 @@
+"""
+Optimized FastHTML-compatible elements that generate HTML directly.
+
+These classes bypass FastHTML's overhead for performance-critical rendering
+by generating HTML strings directly instead of creating full FastHTML objects.
+"""
+
+from functools import lru_cache
+
+from fasthtml.common import NotStr
+
+
+class OptimizedFt:
+    """Lightweight FastHTML-compatible element that generates HTML directly."""
+
+    ATTR_MAP = {
+        "cls": "class",
+        "_id": "id",
+    }
+
+    def __init__(self, tag, *args, **kwargs):
+        self.tag = tag
+        self.children = args
+        self.attrs = {self.safe_attr(k): v for k, v in kwargs.items() if v is not None}
+
+    @staticmethod
+    @lru_cache(maxsize=128)
+    def safe_attr(attr_name):
+        """Convert Python attribute names to HTML attribute names."""
+        attr_name = attr_name.replace("hx_", "hx-")
+        attr_name = attr_name.replace("data_", "data-")
+        return OptimizedFt.ATTR_MAP.get(attr_name, attr_name)
+
+    @staticmethod
+    def to_html_helper(item):
+        """Convert any item to HTML string."""
+        if item is None:
+            return ""
+        elif isinstance(item, str):
+            return item
+        elif isinstance(item, (int, float, bool)):
+            return str(item)
+        elif isinstance(item, OptimizedFt):
+            return item.to_html()
+        elif isinstance(item, NotStr):
+            return str(item)
+        else:
+            raise Exception(f"Unsupported type: {type(item)}, {item=}")
+
+    def to_html(self):
+        """Generate HTML string."""
+        # Build attributes
+        attrs_list = []
+        for k, v in self.attrs.items():
+            if v is False:
+                continue  # Skip False attributes
+            if v is True:
+                attrs_list.append(k)  # Boolean attribute
+            else:
+                # No need to escape v since we control the values (width, IDs, etc.)
+                attrs_list.append(f'{k}="{v}"')
+
+        attrs_str = ' ' + ' '.join(attrs_list) if attrs_list else ''
+
+        # Build children HTML
+        children_html = ''.join(self.to_html_helper(child) for child in self.children)
+
+        return f'<{self.tag}{attrs_str}>{children_html}</{self.tag}>'
+
+    def __ft__(self):
+        """FastHTML compatibility - returns NotStr to avoid double escaping."""
+        return NotStr(self.to_html())
+
+    def __str__(self):
+        return self.to_html()
+
+
+class OptimizedDiv(OptimizedFt):
+    """Optimized Div element."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__("div", *args, **kwargs)