Files
MyFastHtml/benchmarks/profile_datagrid.py

142 lines
3.7 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
DataGrid Performance Profiling Script
Generates a 1000-row DataFrame and profiles the DataGrid.render() method
to identify performance bottlenecks.
Usage:
python benchmarks/profile_datagrid.py
"""
import cProfile
import pstats
from io import StringIO
import numpy as np
import pandas as pd
from myfasthtml.controls.DataGrid import DataGrid
from myfasthtml.core.instances import SingleInstance, InstancesManager
def generate_test_dataframe(rows=1000, cols=10):
"""Generate a test DataFrame with mixed column types."""
np.random.seed(42)
data = {
'ID': range(rows),
'Name': [f'Person_{i}' for i in range(rows)],
'Email': [f'user{i}@example.com' for i in range(rows)],
'Age': np.random.randint(18, 80, rows),
'Salary': np.random.uniform(30000, 150000, rows),
'Active': np.random.choice([True, False], rows),
'Score': np.random.uniform(0, 100, rows),
'Department': np.random.choice(['Sales', 'Engineering', 'Marketing', 'HR'], rows),
'Country': np.random.choice(['France', 'USA', 'Germany', 'UK', 'Spain'], rows),
'Rating': np.random.uniform(1.0, 5.0, rows),
}
# Add extra columns if needed
for i in range(cols - len(data)):
data[f'Extra_Col_{i}'] = np.random.random(rows)
return pd.DataFrame(data)
def profile_datagrid_render(df):
"""Profile the DataGrid render method."""
# Clear instances to start fresh
InstancesManager.instances.clear()
# Create a minimal session
session = {
"user_info": {
"id": "test_tenant_id",
"email": "test@email.com",
"username": "test user",
"role": [],
}
}
# Create root instance as parent
root = SingleInstance(parent=None, session=session, _id="profile-root")
# Create DataGrid (parent, settings, save_state, _id)
datagrid = DataGrid(root)
datagrid.init_from_dataframe(df)
# Profile the render call
profiler = cProfile.Profile()
profiler.enable()
# Execute render
html_output = datagrid.render()
profiler.disable()
return profiler, html_output
def print_profile_stats(profiler, top_n=30):
"""Print formatted profiling statistics."""
s = StringIO()
stats = pstats.Stats(profiler, stream=s)
print("\n" + "=" * 80)
print("PROFILING RESULTS - Top {} functions by cumulative time".format(top_n))
print("=" * 80 + "\n")
stats.sort_stats('cumulative')
stats.print_stats(top_n)
output = s.getvalue()
print(output)
# Extract total time
for line in output.split('\n'):
if 'function calls' in line:
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
print(line)
break
print("\n" + "=" * 80)
print("Top 10 by total time spent (time * ncalls)")
print("=" * 80 + "\n")
s = StringIO()
stats = pstats.Stats(profiler, stream=s)
stats.sort_stats('tottime')
stats.print_stats(10)
print(s.getvalue())
def main():
print("Generating test DataFrame (1000 rows × 10 columns)...")
df = generate_test_dataframe(rows=1000, cols=10)
print(f"DataFrame shape: {df.shape}")
print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024:.2f} KB\n")
print("Profiling DataGrid.render()...")
profiler, html_output = profile_datagrid_render(df)
print(f"\nHTML output length: {len(str(html_output))} characters")
print_profile_stats(profiler, top_n=30)
# Clean up instances
InstancesManager.reset()
print("\n✅ Profiling complete!")
print("\nNext steps:")
print("1. Identify the slowest functions in the 'cumulative time' section")
print("2. Look for functions called many times (high ncalls)")
print("3. Focus optimization on high cumtime + high ncalls functions")
if __name__ == "__main__":
main()