#!/usr/bin/env python3 """ DataGrid Performance Profiling Script Generates a 1000-row DataFrame and profiles the DataGrid.render() method to identify performance bottlenecks. Usage: python benchmarks/profile_datagrid.py """ import cProfile import pstats from io import StringIO import numpy as np import pandas as pd from myfasthtml.controls.DataGrid import DataGrid from myfasthtml.core.instances import SingleInstance, InstancesManager def generate_test_dataframe(rows=1000, cols=10): """Generate a test DataFrame with mixed column types.""" np.random.seed(42) data = { 'ID': range(rows), 'Name': [f'Person_{i}' for i in range(rows)], 'Email': [f'user{i}@example.com' for i in range(rows)], 'Age': np.random.randint(18, 80, rows), 'Salary': np.random.uniform(30000, 150000, rows), 'Active': np.random.choice([True, False], rows), 'Score': np.random.uniform(0, 100, rows), 'Department': np.random.choice(['Sales', 'Engineering', 'Marketing', 'HR'], rows), 'Country': np.random.choice(['France', 'USA', 'Germany', 'UK', 'Spain'], rows), 'Rating': np.random.uniform(1.0, 5.0, rows), } # Add extra columns if needed for i in range(cols - len(data)): data[f'Extra_Col_{i}'] = np.random.random(rows) return pd.DataFrame(data) def profile_datagrid_render(df): """Profile the DataGrid render method.""" # Clear instances to start fresh InstancesManager.instances.clear() # Create a minimal session session = { "user_info": { "id": "test_tenant_id", "email": "test@email.com", "username": "test user", "role": [], } } # Create root instance as parent root = SingleInstance(parent=None, session=session, _id="profile-root") # Create DataGrid (parent, settings, save_state, _id) datagrid = DataGrid(root) datagrid.init_from_dataframe(df) # Profile the render call profiler = cProfile.Profile() profiler.enable() # Execute render html_output = datagrid.render() profiler.disable() return profiler, html_output def print_profile_stats(profiler, top_n=30): """Print formatted profiling statistics.""" s = StringIO() stats = pstats.Stats(profiler, stream=s) print("\n" + "=" * 80) print("PROFILING RESULTS - Top {} functions by cumulative time".format(top_n)) print("=" * 80 + "\n") stats.sort_stats('cumulative') stats.print_stats(top_n) output = s.getvalue() print(output) # Extract total time for line in output.split('\n'): if 'function calls' in line: print("\n" + "=" * 80) print("SUMMARY") print("=" * 80) print(line) break print("\n" + "=" * 80) print("Top 10 by total time spent (time * ncalls)") print("=" * 80 + "\n") s = StringIO() stats = pstats.Stats(profiler, stream=s) stats.sort_stats('tottime') stats.print_stats(10) print(s.getvalue()) def main(): print("Generating test DataFrame (1000 rows × 10 columns)...") df = generate_test_dataframe(rows=1000, cols=10) print(f"DataFrame shape: {df.shape}") print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024:.2f} KB\n") print("Profiling DataGrid.render()...") profiler, html_output = profile_datagrid_render(df) print(f"\nHTML output length: {len(str(html_output))} characters") print_profile_stats(profiler, top_n=30) # Clean up instances InstancesManager.reset() print("\n✅ Profiling complete!") print("\nNext steps:") print("1. Identify the slowest functions in the 'cumulative time' section") print("2. Look for functions called many times (high ncalls)") print("3. Focus optimization on high cumtime + high ncalls functions") if __name__ == "__main__": main()