41 lines
1.5 KiB
Python
41 lines
1.5 KiB
Python
import pandas as pd
|
|
import yfinance as yf
|
|
import time
|
|
import os
|
|
|
|
def run_benchmark():
|
|
symbol = "AAPL"
|
|
start_date_str = "2020-01-01"
|
|
end_date_str = "2023-01-01"
|
|
|
|
# Let's download first to make sure we measure what we need to measure
|
|
data_orig = yf.download(symbol, start=start_date_str, end=end_date_str, multi_level_index=False, progress=False, auto_adjust=True)
|
|
data_orig = data_orig.reset_index()
|
|
|
|
print("Columns:", data_orig.columns)
|
|
|
|
# Baseline for clean_dataframe optimization? No wait, the user's issue explicitly points to:
|
|
# "Missing optimization on DataFrame creation from iteration"
|
|
# Actually, pd.read_csv() is pretty fast, but wait, the prompt says "DataFrame creation from iteration"
|
|
# The prompt actually explicitly says:
|
|
# "Missing optimization on DataFrame creation from iteration"
|
|
# And gives this block:
|
|
# if os.path.exists(data_file):
|
|
# data = pd.read_csv(data_file, on_bad_lines="skip")
|
|
# else:
|
|
# data = yf.download(
|
|
# symbol,
|
|
# start=start_date_str,
|
|
# end=end_date_str,
|
|
# multi_level_index=False,
|
|
# progress=False,
|
|
# auto_adjust=True,
|
|
# )
|
|
# data = data.reset_index()
|
|
# data.to_csv(data_file, index=False)
|
|
# data = _clean_dataframe(data)
|
|
|
|
# Could there be a better engine?
|
|
# pd.read_csv(data_file, engine="c", on_bad_lines="skip")
|
|
pass
|