diff --git a/tradingagents/dataflows/alpha_vantage_fundamentals.py b/tradingagents/dataflows/alpha_vantage_fundamentals.py index 8b92faa6..3f401337 100644 --- a/tradingagents/dataflows/alpha_vantage_fundamentals.py +++ b/tradingagents/dataflows/alpha_vantage_fundamentals.py @@ -26,7 +26,7 @@ def get_balance_sheet(ticker: str, freq: str = "quarterly", curr_date: str = Non Args: ticker (str): Ticker symbol of the company freq (str): Reporting frequency: annual/quarterly (default quarterly) - not used for Alpha Vantage - curr_date (str): Current date you are trading at, yyyy-mm-dd (not used for Alpha Vantage) + curr_date (str): Current date you are trading at, yyyy-mm-dd Returns: str: Balance sheet data with normalized fields @@ -35,7 +35,14 @@ def get_balance_sheet(ticker: str, freq: str = "quarterly", curr_date: str = Non "symbol": ticker, } - return _make_api_request("BALANCE_SHEET", params) + result = _make_api_request("BALANCE_SHEET", params) + # Filter out reports whose fiscalDateEnding is after curr_date to prevent look-ahead bias. + if curr_date and isinstance(result, dict): + for key in ("annualReports", "quarterlyReports"): + if key in result: + result[key] = [r for r in result[key] + if r.get("fiscalDateEnding", "") <= curr_date] + return result def get_cashflow(ticker: str, freq: str = "quarterly", curr_date: str = None) -> str: @@ -45,7 +52,7 @@ def get_cashflow(ticker: str, freq: str = "quarterly", curr_date: str = None) -> Args: ticker (str): Ticker symbol of the company freq (str): Reporting frequency: annual/quarterly (default quarterly) - not used for Alpha Vantage - curr_date (str): Current date you are trading at, yyyy-mm-dd (not used for Alpha Vantage) + curr_date (str): Current date you are trading at, yyyy-mm-dd Returns: str: Cash flow statement data with normalized fields @@ -54,7 +61,14 @@ def get_cashflow(ticker: str, freq: str = "quarterly", curr_date: str = None) -> "symbol": ticker, } - return _make_api_request("CASH_FLOW", params) + result = _make_api_request("CASH_FLOW", params) + # Filter out reports whose fiscalDateEnding is after curr_date to prevent look-ahead bias. + if curr_date and isinstance(result, dict): + for key in ("annualReports", "quarterlyReports"): + if key in result: + result[key] = [r for r in result[key] + if r.get("fiscalDateEnding", "") <= curr_date] + return result def get_income_statement(ticker: str, freq: str = "quarterly", curr_date: str = None) -> str: @@ -64,7 +78,7 @@ def get_income_statement(ticker: str, freq: str = "quarterly", curr_date: str = Args: ticker (str): Ticker symbol of the company freq (str): Reporting frequency: annual/quarterly (default quarterly) - not used for Alpha Vantage - curr_date (str): Current date you are trading at, yyyy-mm-dd (not used for Alpha Vantage) + curr_date (str): Current date you are trading at, yyyy-mm-dd Returns: str: Income statement data with normalized fields @@ -73,5 +87,12 @@ def get_income_statement(ticker: str, freq: str = "quarterly", curr_date: str = "symbol": ticker, } - return _make_api_request("INCOME_STATEMENT", params) + result = _make_api_request("INCOME_STATEMENT", params) + # Filter out reports whose fiscalDateEnding is after curr_date to prevent look-ahead bias. + if curr_date and isinstance(result, dict): + for key in ("annualReports", "quarterlyReports"): + if key in result: + result[key] = [r for r in result[key] + if r.get("fiscalDateEnding", "") <= curr_date] + return result diff --git a/tradingagents/dataflows/stockstats_utils.py b/tradingagents/dataflows/stockstats_utils.py index 47d5460a..3846c65d 100644 --- a/tradingagents/dataflows/stockstats_utils.py +++ b/tradingagents/dataflows/stockstats_utils.py @@ -57,11 +57,12 @@ class StockstatsUtils: ): config = get_config() - today_date = pd.Timestamp.today() curr_date_dt = pd.to_datetime(curr_date) - end_date = today_date - start_date = today_date - pd.DateOffset(years=15) + # Cap end_date to curr_date to prevent look-ahead bias in backtesting. + # Using curr_date + 1 day so yfinance includes the simulation date itself. + end_date = curr_date_dt + pd.DateOffset(days=1) + start_date = curr_date_dt - pd.DateOffset(years=15) start_date_str = start_date.strftime("%Y-%m-%d") end_date_str = end_date.strftime("%Y-%m-%d") diff --git a/tradingagents/dataflows/y_finance.py b/tradingagents/dataflows/y_finance.py index 3682a01d..8f6d6680 100644 --- a/tradingagents/dataflows/y_finance.py +++ b/tradingagents/dataflows/y_finance.py @@ -216,11 +216,11 @@ def _get_stock_stats_bulk( raise Exception("Stockstats fail: Yahoo Finance data not fetched yet!") else: # Online data fetching with caching - today_date = pd.Timestamp.today() curr_date_dt = pd.to_datetime(curr_date) - end_date = today_date - start_date = today_date - pd.DateOffset(years=15) + # Cap end_date to curr_date to prevent look-ahead bias in backtesting. + end_date = curr_date_dt + pd.DateOffset(days=1) + start_date = curr_date_dt - pd.DateOffset(years=15) start_date_str = start_date.strftime("%Y-%m-%d") end_date_str = end_date.strftime("%Y-%m-%d") @@ -353,7 +353,7 @@ def get_fundamentals( def get_balance_sheet( ticker: Annotated[str, "ticker symbol of the company"], freq: Annotated[str, "frequency of data: 'annual' or 'quarterly'"] = "quarterly", - curr_date: Annotated[str, "current date (not used for yfinance)"] = None + curr_date: Annotated[str, "current date in YYYY-MM-DD format for look-ahead bias prevention"] = None ): """Get balance sheet data from yfinance.""" try: @@ -363,9 +363,16 @@ def get_balance_sheet( data = yf_retry(lambda: ticker_obj.quarterly_balance_sheet) else: data = yf_retry(lambda: ticker_obj.balance_sheet) - + + # Filter out fiscal periods after curr_date to prevent look-ahead bias. + if curr_date and not data.empty: + cutoff = pd.Timestamp(curr_date) + data = data.loc[:, [c for c in data.columns if pd.Timestamp(c) <= cutoff]] + if data.empty: - return f"No balance sheet data found for symbol '{ticker}'" + return f"No balance sheet data found for symbol '{ticker}'" + ( + f" on or before {curr_date}" if curr_date else "" + ) # Convert to CSV string for consistency with other functions csv_string = data.to_csv() @@ -383,7 +390,7 @@ def get_balance_sheet( def get_cashflow( ticker: Annotated[str, "ticker symbol of the company"], freq: Annotated[str, "frequency of data: 'annual' or 'quarterly'"] = "quarterly", - curr_date: Annotated[str, "current date (not used for yfinance)"] = None + curr_date: Annotated[str, "current date in YYYY-MM-DD format for look-ahead bias prevention"] = None ): """Get cash flow data from yfinance.""" try: @@ -393,9 +400,16 @@ def get_cashflow( data = yf_retry(lambda: ticker_obj.quarterly_cashflow) else: data = yf_retry(lambda: ticker_obj.cashflow) - + + # Filter out fiscal periods after curr_date to prevent look-ahead bias. + if curr_date and not data.empty: + cutoff = pd.Timestamp(curr_date) + data = data.loc[:, [c for c in data.columns if pd.Timestamp(c) <= cutoff]] + if data.empty: - return f"No cash flow data found for symbol '{ticker}'" + return f"No cash flow data found for symbol '{ticker}'" + ( + f" on or before {curr_date}" if curr_date else "" + ) # Convert to CSV string for consistency with other functions csv_string = data.to_csv() @@ -413,7 +427,7 @@ def get_cashflow( def get_income_statement( ticker: Annotated[str, "ticker symbol of the company"], freq: Annotated[str, "frequency of data: 'annual' or 'quarterly'"] = "quarterly", - curr_date: Annotated[str, "current date (not used for yfinance)"] = None + curr_date: Annotated[str, "current date in YYYY-MM-DD format for look-ahead bias prevention"] = None ): """Get income statement data from yfinance.""" try: @@ -423,9 +437,16 @@ def get_income_statement( data = yf_retry(lambda: ticker_obj.quarterly_income_stmt) else: data = yf_retry(lambda: ticker_obj.income_stmt) - + + # Filter out fiscal periods after curr_date to prevent look-ahead bias. + if curr_date and not data.empty: + cutoff = pd.Timestamp(curr_date) + data = data.loc[:, [c for c in data.columns if pd.Timestamp(c) <= cutoff]] + if data.empty: - return f"No income statement data found for symbol '{ticker}'" + return f"No income statement data found for symbol '{ticker}'" + ( + f" on or before {curr_date}" if curr_date else "" + ) # Convert to CSV string for consistency with other functions csv_string = data.to_csv() diff --git a/tradingagents/dataflows/yfinance_news.py b/tradingagents/dataflows/yfinance_news.py index 20e9120d..d63d4711 100644 --- a/tradingagents/dataflows/yfinance_news.py +++ b/tradingagents/dataflows/yfinance_news.py @@ -167,6 +167,11 @@ def get_global_news_yfinance( # Handle both flat and nested structures if "content" in article: data = _extract_article_data(article) + # Skip articles published after curr_date (look-ahead guard) + if data["pub_date"]: + pub_naive = data["pub_date"].replace(tzinfo=None) + if pub_naive > curr_dt + relativedelta(days=1): + continue title = data["title"] publisher = data["publisher"] link = data["link"]