MyFastHtml/benchmarks/profile_datagrid.py

#!/usr/bin/env python3
"""
DataGrid Performance Profiling Script

Generates a 1000-row DataFrame and profiles the DataGrid.render() method
to identify performance bottlenecks.

Usage:
    python benchmarks/profile_datagrid.py
"""

import cProfile
import pstats
from io import StringIO

import numpy as np
import pandas as pd

from myfasthtml.controls.DataGrid import DataGrid
from myfasthtml.core.instances import SingleInstance, InstancesManager


def generate_test_dataframe(rows=1000, cols=10):
  """Generate a test DataFrame with mixed column types."""
  np.random.seed(42)

  data = {
      'ID': range(rows),
      'Name': [f'Person_{i}' for i in range(rows)],
      'Email': [f'user{i}@example.com' for i in range(rows)],
      'Age': np.random.randint(18, 80, rows),
      'Salary': np.random.uniform(30000, 150000, rows),
      'Active': np.random.choice([True, False], rows),
      'Score': np.random.uniform(0, 100, rows),
      'Department': np.random.choice(['Sales', 'Engineering', 'Marketing', 'HR'], rows),
      'Country': np.random.choice(['France', 'USA', 'Germany', 'UK', 'Spain'], rows),
      'Rating': np.random.uniform(1.0, 5.0, rows),
  }

  # Add extra columns if needed
  for i in range(cols - len(data)):
    data[f'Extra_Col_{i}'] = np.random.random(rows)

  return pd.DataFrame(data)


def profile_datagrid_render(df):
  """Profile the DataGrid render method."""

  # Clear instances to start fresh
  InstancesManager.instances.clear()

  # Create a minimal session
  session = {
      "user_info": {
          "id": "test_tenant_id",
          "email": "test@email.com",
          "username": "test user",
          "role": [],
      }
  }

  # Create root instance as parent
  root = SingleInstance(parent=None, session=session, _id="profile-root")

  # Create DataGrid (parent, settings, save_state, _id)
  datagrid = DataGrid(root)
  datagrid.init_from_dataframe(df)

  # Profile the render call
  profiler = cProfile.Profile()
  profiler.enable()

  # Execute render
  html_output = datagrid.render()

  profiler.disable()

  return profiler, html_output


def print_profile_stats(profiler, top_n=30):
  """Print formatted profiling statistics."""
  s = StringIO()
  stats = pstats.Stats(profiler, stream=s)

  print("\n" + "=" * 80)
  print("PROFILING RESULTS - Top {} functions by cumulative time".format(top_n))
  print("=" * 80 + "\n")

  stats.sort_stats('cumulative')
  stats.print_stats(top_n)

  output = s.getvalue()
  print(output)

  # Extract total time
  for line in output.split('\n'):
    if 'function calls' in line:
      print("\n" + "=" * 80)
      print("SUMMARY")
      print("=" * 80)
      print(line)
      break

  print("\n" + "=" * 80)
  print("Top 10 by total time spent (time * ncalls)")
  print("=" * 80 + "\n")

  s = StringIO()
  stats = pstats.Stats(profiler, stream=s)
  stats.sort_stats('tottime')
  stats.print_stats(10)
  print(s.getvalue())


def main():
  print("Generating test DataFrame (1000 rows × 10 columns)...")
  df = generate_test_dataframe(rows=1000, cols=10)
  print(f"DataFrame shape: {df.shape}")
  print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024:.2f} KB\n")

  print("Profiling DataGrid.render()...")
  profiler, html_output = profile_datagrid_render(df)

  print(f"\nHTML output length: {len(str(html_output))} characters")

  print_profile_stats(profiler, top_n=30)

  # Clean up instances
  InstancesManager.reset()

  print("\n✅ Profiling complete!")
  print("\nNext steps:")
  print("1. Identify the slowest functions in the 'cumulative time' section")
  print("2. Look for functions called many times (high ncalls)")
  print("3. Focus optimization on high cumtime + high ncalls functions")


if __name__ == "__main__":
  main()