162 lines
6.4 KiB
Python
162 lines
6.4 KiB
Python
"""
|
|
Tests to verify no future data leakage in backtesting.
|
|
|
|
These tests ensure that when running backtests for a historical date,
|
|
the system only uses data that would have been available at that time.
|
|
"""
|
|
import pytest
|
|
from datetime import datetime, timedelta
|
|
import pandas as pd
|
|
|
|
|
|
class TestStockstatsDataLeakage:
|
|
"""Test that stockstats_utils.py doesn't leak future data."""
|
|
|
|
def test_stockstats_uses_curr_date_not_today(self):
|
|
"""Verify stockstats filters data up to curr_date, not today."""
|
|
from tradingagents.dataflows.stockstats_utils import StockstatsUtils
|
|
|
|
# Test with a historical date
|
|
historical_date = "2024-06-15"
|
|
|
|
# This should only use data up to 2024-06-15
|
|
# If the function works correctly, we shouldn't get data beyond this date
|
|
try:
|
|
result = StockstatsUtils.get_stock_stats("AAPL", "close", historical_date)
|
|
# Result should be a value, not an error
|
|
assert result is not None
|
|
except Exception as e:
|
|
# If local data not available, that's expected
|
|
if "Yahoo Finance data not fetched yet" not in str(e):
|
|
raise
|
|
|
|
def test_end_date_equals_curr_date(self):
|
|
"""Verify that yfinance download uses curr_date as end_date."""
|
|
# This is a structural check - the code should use curr_date_dt as end_date_dt
|
|
from tradingagents.dataflows.stockstats_utils import StockstatsUtils
|
|
import inspect
|
|
|
|
source = inspect.getsource(StockstatsUtils.get_stock_stats)
|
|
|
|
# Check that the code uses curr_date for end_date calculation
|
|
assert "end_date_dt = curr_date_dt" in source or "end=end_date" in source, \
|
|
"stockstats should use curr_date as end_date to prevent future data leakage"
|
|
|
|
|
|
class TestAlphaVantageDataLeakage:
|
|
"""Test that alpha_vantage_stock.py doesn't leak future data."""
|
|
|
|
def test_outputsize_uses_end_date_not_now(self):
|
|
"""Verify outputsize calculation uses end_date, not datetime.now()."""
|
|
from tradingagents.dataflows.alpha_vantage_stock import get_stock
|
|
import inspect
|
|
|
|
source = inspect.getsource(get_stock)
|
|
|
|
# Should NOT contain datetime.now() for outputsize calculation
|
|
assert "datetime.now()" not in source, \
|
|
"alpha_vantage_stock should not use datetime.now() - use end_date instead"
|
|
|
|
# Should use end_date for the calculation
|
|
assert "end_dt" in source or "end_date" in source, \
|
|
"Should use end_date for outputsize calculation"
|
|
|
|
|
|
class TestFundamentalsDataLeakage:
|
|
"""Test that fundamentals data respects publication delays."""
|
|
|
|
def test_publication_delay_is_conservative(self):
|
|
"""Verify publication delay is at least 60 days (conservative estimate)."""
|
|
from tradingagents.dataflows.y_finance import _filter_fundamentals_by_date
|
|
import inspect
|
|
|
|
source = inspect.getsource(_filter_fundamentals_by_date)
|
|
|
|
# Check that publication delay is at least 60 days
|
|
assert "publication_delay_days = 60" in source or "publication_delay_days = 90" in source, \
|
|
"Publication delay should be at least 60 days for conservative backtesting"
|
|
|
|
def test_fundamentals_filtered_by_publish_date(self):
|
|
"""Verify fundamentals are filtered by estimated publish date, not report date."""
|
|
from tradingagents.dataflows.y_finance import _filter_fundamentals_by_date
|
|
import pandas as pd
|
|
|
|
# Create mock fundamentals data with future dates
|
|
curr_date = "2024-06-15"
|
|
# Report from Q1 2024 (dated 2024-03-31) should be visible
|
|
# Report from Q2 2024 (dated 2024-06-30) should NOT be visible
|
|
|
|
mock_data = pd.DataFrame({
|
|
"2024-03-31": [100, 200],
|
|
"2024-06-30": [150, 250], # This shouldn't be visible in June
|
|
"2024-09-30": [175, 275], # This definitely shouldn't be visible
|
|
}, index=["Revenue", "Profit"])
|
|
|
|
filtered = _filter_fundamentals_by_date(mock_data, curr_date)
|
|
|
|
# With 60-day delay, only 2024-03-31 should be visible on 2024-06-15
|
|
# because 2024-03-31 + 60 days = 2024-05-30, which is before 2024-06-15
|
|
assert "2024-03-31" in filtered.columns, \
|
|
"Q1 2024 report (dated 2024-03-31) should be visible on 2024-06-15"
|
|
|
|
assert "2024-06-30" not in filtered.columns, \
|
|
"Q2 2024 report (dated 2024-06-30) should NOT be visible on 2024-06-15"
|
|
|
|
assert "2024-09-30" not in filtered.columns, \
|
|
"Q3 2024 report should definitely NOT be visible on 2024-06-15"
|
|
|
|
|
|
class TestLocalDataLeakage:
|
|
"""Test that local data files are properly filtered."""
|
|
|
|
def test_local_data_has_date_filtering(self):
|
|
"""Verify local data reading includes date filtering."""
|
|
from tradingagents.dataflows.local import get_stock_data
|
|
import inspect
|
|
|
|
source = inspect.getsource(get_stock_data)
|
|
|
|
# Should filter data by date range
|
|
assert "start_date" in source and "end_date" in source, \
|
|
"Local data should be filtered by date range"
|
|
|
|
|
|
class TestBacktestIntegrity:
|
|
"""Test overall backtest integrity."""
|
|
|
|
def test_no_future_imports_in_dataflows(self):
|
|
"""Verify dataflows don't use any patterns that could leak future data."""
|
|
import os
|
|
import re
|
|
|
|
dataflows_dir = "tradingagents/dataflows"
|
|
|
|
dangerous_patterns = [
|
|
# Using today's date when historical date should be used
|
|
r"pd\.Timestamp\.today\(\)",
|
|
r"datetime\.today\(\)",
|
|
r"date\.today\(\)",
|
|
# Unfiltered data access (should always filter by date)
|
|
r"\.history\([^)]*\)(?![^)]*end=)", # history() without end parameter
|
|
]
|
|
|
|
issues = []
|
|
|
|
for root, dirs, files in os.walk(dataflows_dir):
|
|
for file in files:
|
|
if file.endswith(".py"):
|
|
filepath = os.path.join(root, file)
|
|
with open(filepath, "r") as f:
|
|
content = f.read()
|
|
for pattern in dangerous_patterns:
|
|
matches = re.findall(pattern, content)
|
|
if matches:
|
|
issues.append(f"{filepath}: Found dangerous pattern {pattern}")
|
|
|
|
assert len(issues) == 0, \
|
|
f"Found potential data leakage patterns:\n" + "\n".join(issues)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|