import time import pandas as pd import numpy as np # We want to benchmark the difference between iterating with a list comprehension # vs vectorized str.lower() method for pd.DataFrame column manipulation. # Let's create a DataFrame with many columns to see the difference clearly. # For a typical stock dataframe, the number of columns is small (e.g. 6-7). # Let's benchmark for both a small DataFrame and a very large DataFrame. def benchmark(num_cols, iterations): cols = [f"Col_{i}" for i in range(num_cols)] df = pd.DataFrame(columns=cols) start = time.time() for _ in range(iterations): _ = [str(c).lower() for c in df.columns] t1 = time.time() - start start = time.time() for _ in range(iterations): _ = df.columns.astype(str).str.lower() t2 = time.time() - start print(f"Num cols: {num_cols}, Iterations: {iterations}") print(f"List comprehension: {t1:.6f} s") print(f"Pandas str.lower(): {t2:.6f} s") print("-" * 30) benchmark(10, 10000) benchmark(100, 10000) benchmark(1000, 10000)