import pandas as pd import yfinance as yf import time import os def run_benchmark(): symbol = "AAPL" start_date_str = "2020-01-01" end_date_str = "2023-01-01" # Let's download first to make sure we measure what we need to measure data_orig = yf.download(symbol, start=start_date_str, end=end_date_str, multi_level_index=False, progress=False, auto_adjust=True) data_orig = data_orig.reset_index() print("Columns:", data_orig.columns) # Baseline for clean_dataframe optimization? No wait, the user's issue explicitly points to: # "Missing optimization on DataFrame creation from iteration" # Actually, pd.read_csv() is pretty fast, but wait, the prompt says "DataFrame creation from iteration" # The prompt actually explicitly says: # "Missing optimization on DataFrame creation from iteration" # And gives this block: # if os.path.exists(data_file): # data = pd.read_csv(data_file, on_bad_lines="skip") # else: # data = yf.download( # symbol, # start=start_date_str, # end=end_date_str, # multi_level_index=False, # progress=False, # auto_adjust=True, # ) # data = data.reset_index() # data.to_csv(data_file, index=False) # data = _clean_dataframe(data) # Could there be a better engine? # pd.read_csv(data_file, engine="c", on_bad_lines="skip") pass