34 lines
1.0 KiB
Python
34 lines
1.0 KiB
Python
import time
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
# We want to benchmark the difference between iterating with a list comprehension
|
|
# vs vectorized str.lower() method for pd.DataFrame column manipulation.
|
|
|
|
# Let's create a DataFrame with many columns to see the difference clearly.
|
|
# For a typical stock dataframe, the number of columns is small (e.g. 6-7).
|
|
# Let's benchmark for both a small DataFrame and a very large DataFrame.
|
|
|
|
def benchmark(num_cols, iterations):
|
|
cols = [f"Col_{i}" for i in range(num_cols)]
|
|
df = pd.DataFrame(columns=cols)
|
|
|
|
start = time.time()
|
|
for _ in range(iterations):
|
|
_ = [str(c).lower() for c in df.columns]
|
|
t1 = time.time() - start
|
|
|
|
start = time.time()
|
|
for _ in range(iterations):
|
|
_ = df.columns.astype(str).str.lower()
|
|
t2 = time.time() - start
|
|
|
|
print(f"Num cols: {num_cols}, Iterations: {iterations}")
|
|
print(f"List comprehension: {t1:.6f} s")
|
|
print(f"Pandas str.lower(): {t2:.6f} s")
|
|
print("-" * 30)
|
|
|
|
benchmark(10, 10000)
|
|
benchmark(100, 10000)
|
|
benchmark(1000, 10000)
|