TradingAgents/tests/integration/dataflows/test_multi_timeframe_integr...

434 lines
18 KiB
Python

"""
Test suite for Multi-Timeframe Aggregation Integration Tests.
This module tests:
1. Integration with yfinance data format
2. Timezone handling in datetime indices
3. Volume preservation across aggregations
4. Real-world edge cases (gaps in data, single day, etc.)
5. End-to-end workflows (daily -> weekly -> monthly)
Test Coverage:
- Integration tests with yfinance-like data formats
- Timezone-aware datetime handling
- Data gaps and missing days (weekends, holidays)
- Volume accuracy across transformations
- Multiple aggregation chaining
"""
import pytest
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from unittest.mock import Mock, patch
pytestmark = pytest.mark.integration
# ============================================================================
# Fixtures
# ============================================================================
@pytest.fixture
def yfinance_format_data():
"""
Create data in yfinance format with timezone-aware DatetimeIndex.
yfinance returns data with:
- Timezone-aware datetime index (usually UTC or exchange timezone)
- Capitalized column names (Open, High, Low, Close, Volume)
- Business day frequency (no weekends)
- Potential gaps for holidays
"""
# Create 30 business days (excludes weekends)
dates = pd.bdate_range('2024-01-01', periods=30, freq='B', tz='America/New_York')
data = pd.DataFrame({
'Open': [100.0 + i * 0.5 for i in range(30)],
'High': [102.0 + i * 0.5 for i in range(30)],
'Low': [99.0 + i * 0.5 for i in range(30)],
'Close': [101.0 + i * 0.5 for i in range(30)],
'Volume': [1000000 + i * 10000 for i in range(30)],
}, index=dates)
return data
@pytest.fixture
def data_with_gaps():
"""
Create data with gaps (missing days for weekends and holidays).
Simulates real market data where weekends and holidays are missing.
"""
# Create dates but skip weekends and one holiday (Jan 15)
all_dates = pd.date_range('2024-01-01', '2024-01-31', freq='D')
# Filter to business days and remove Jan 15 (MLK Day)
business_dates = [d for d in all_dates if d.weekday() < 5 and d.day != 15]
data = pd.DataFrame({
'Open': [100.0 + i * 0.5 for i in range(len(business_dates))],
'High': [102.0 + i * 0.5 for i in range(len(business_dates))],
'Low': [99.0 + i * 0.5 for i in range(len(business_dates))],
'Close': [101.0 + i * 0.5 for i in range(len(business_dates))],
'Volume': [1000000 + i * 10000 for i in range(len(business_dates))],
}, index=pd.DatetimeIndex(business_dates))
return data
@pytest.fixture
def timezone_aware_data():
"""Create data with different timezone configurations."""
dates_utc = pd.date_range('2024-01-01', periods=30, freq='D', tz='UTC')
dates_est = pd.date_range('2024-01-01', periods=30, freq='D', tz='America/New_York')
dates_jst = pd.date_range('2024-01-01', periods=30, freq='D', tz='Asia/Tokyo')
base_data = {
'Open': [100.0 + i * 0.5 for i in range(30)],
'High': [102.0 + i * 0.5 for i in range(30)],
'Low': [99.0 + i * 0.5 for i in range(30)],
'Close': [101.0 + i * 0.5 for i in range(30)],
'Volume': [1000000 + i * 10000 for i in range(30)],
}
return {
'utc': pd.DataFrame(base_data, index=dates_utc),
'est': pd.DataFrame(base_data, index=dates_est),
'jst': pd.DataFrame(base_data, index=dates_jst),
}
# ============================================================================
# Test YFinance Integration
# ============================================================================
class TestYFinanceIntegration:
"""Test aggregation with yfinance-like data formats."""
def test_aggregation_with_yfinance_format(self, yfinance_format_data):
"""Should handle yfinance format data correctly."""
from tradingagents.dataflows.multi_timeframe import aggregate_to_weekly
result = aggregate_to_weekly(yfinance_format_data, anchor='SUN')
assert isinstance(result, pd.DataFrame)
assert len(result) > 0
# Should preserve timezone awareness
assert result.index.tz is not None
assert str(result.index.tz) == 'America/New_York'
# Should have correct OHLCV columns
assert all(col in result.columns for col in ['Open', 'High', 'Low', 'Close', 'Volume'])
# Verify aggregation logic
assert result.iloc[0]['Open'] == yfinance_format_data.iloc[0]['Open']
assert result.iloc[-1]['Close'] == yfinance_format_data.iloc[-1]['Close']
def test_timezone_handling(self, timezone_aware_data):
"""Should preserve timezone information across aggregations."""
from tradingagents.dataflows.multi_timeframe import aggregate_to_weekly, aggregate_to_monthly
for tz_name, data in timezone_aware_data.items():
# Test weekly aggregation
weekly = aggregate_to_weekly(data, anchor='SUN')
assert isinstance(weekly, pd.DataFrame)
assert weekly.index.tz is not None
# Test monthly aggregation
monthly = aggregate_to_monthly(data, period_end=True)
assert isinstance(monthly, pd.DataFrame)
assert monthly.index.tz is not None
def test_volume_preservation(self, yfinance_format_data):
"""Total volume should be preserved across aggregations."""
from tradingagents.dataflows.multi_timeframe import aggregate_to_weekly, aggregate_to_monthly
original_total_volume = yfinance_format_data['Volume'].sum()
# Test weekly aggregation preserves volume
weekly = aggregate_to_weekly(yfinance_format_data, anchor='SUN')
assert isinstance(weekly, pd.DataFrame)
weekly_total_volume = weekly['Volume'].sum()
assert weekly_total_volume == original_total_volume
# Test monthly aggregation preserves volume
monthly = aggregate_to_monthly(yfinance_format_data, period_end=True)
assert isinstance(monthly, pd.DataFrame)
monthly_total_volume = monthly['Volume'].sum()
assert monthly_total_volume == original_total_volume
def test_business_day_frequency_handling(self):
"""Should handle business day frequency (no weekends) correctly."""
from tradingagents.dataflows.multi_timeframe import aggregate_to_weekly
# Create 20 business days (4 weeks excluding weekends)
dates = pd.bdate_range('2024-01-01', periods=20, freq='B')
data = pd.DataFrame({
'Open': range(100, 120),
'High': range(102, 122),
'Low': range(99, 119),
'Close': range(101, 121),
'Volume': range(1000000, 1020000, 1000),
}, index=dates)
result = aggregate_to_weekly(data, anchor='SUN')
assert isinstance(result, pd.DataFrame)
# Should create appropriate number of weeks
assert len(result) >= 4
# Verify volume preservation
assert result['Volume'].sum() == data['Volume'].sum()
# ============================================================================
# Test Edge Cases
# ============================================================================
class TestEdgeCases:
"""Test edge cases and real-world scenarios."""
def test_single_day_data(self):
"""Should handle single day of data correctly."""
from tradingagents.dataflows.multi_timeframe import aggregate_to_weekly, aggregate_to_monthly
dates = pd.date_range('2024-01-15', periods=1, freq='D')
data = pd.DataFrame({
'Open': [100.0],
'High': [102.0],
'Low': [99.0],
'Close': [101.0],
'Volume': [1000000],
}, index=dates)
# Weekly aggregation
weekly = aggregate_to_weekly(data, anchor='SUN')
assert isinstance(weekly, pd.DataFrame)
assert len(weekly) == 1
assert weekly.iloc[0]['Open'] == 100.0
assert weekly.iloc[0]['High'] == 102.0
assert weekly.iloc[0]['Low'] == 99.0
assert weekly.iloc[0]['Close'] == 101.0
assert weekly.iloc[0]['Volume'] == 1000000
# Monthly aggregation
monthly = aggregate_to_monthly(data, period_end=True)
assert isinstance(monthly, pd.DataFrame)
assert len(monthly) == 1
assert monthly.iloc[0]['Open'] == 100.0
assert monthly.iloc[0]['High'] == 102.0
assert monthly.iloc[0]['Low'] == 99.0
assert monthly.iloc[0]['Close'] == 101.0
assert monthly.iloc[0]['Volume'] == 1000000
def test_data_with_gaps(self, data_with_gaps):
"""Should handle data with gaps (weekends, holidays) correctly."""
from tradingagents.dataflows.multi_timeframe import aggregate_to_weekly, aggregate_to_monthly
# Weekly aggregation should work with gaps
weekly = aggregate_to_weekly(data_with_gaps, anchor='SUN')
assert isinstance(weekly, pd.DataFrame)
assert len(weekly) > 0
# Volume should be preserved despite gaps
assert weekly['Volume'].sum() == data_with_gaps['Volume'].sum()
# Monthly aggregation should work with gaps
monthly = aggregate_to_monthly(data_with_gaps, period_end=True)
assert isinstance(monthly, pd.DataFrame)
assert len(monthly) == 1 # All data in January
# Verify aggregation accuracy
assert monthly.iloc[0]['Open'] == data_with_gaps.iloc[0]['Open']
assert monthly.iloc[0]['Close'] == data_with_gaps.iloc[-1]['Close']
assert monthly.iloc[0]['High'] == data_with_gaps['High'].max()
assert monthly.iloc[0]['Low'] == data_with_gaps['Low'].min()
assert monthly.iloc[0]['Volume'] == data_with_gaps['Volume'].sum()
def test_multiple_months_with_gaps(self):
"""Should handle multiple months with gaps correctly."""
from tradingagents.dataflows.multi_timeframe import aggregate_to_monthly
# Create 3 months of business days only
dates = pd.bdate_range('2024-01-01', '2024-03-31', freq='B')
data = pd.DataFrame({
'Open': [100.0 + i * 0.1 for i in range(len(dates))],
'High': [102.0 + i * 0.1 for i in range(len(dates))],
'Low': [99.0 + i * 0.1 for i in range(len(dates))],
'Close': [101.0 + i * 0.1 for i in range(len(dates))],
'Volume': [1000000 + i * 1000 for i in range(len(dates))],
}, index=dates)
result = aggregate_to_monthly(data, period_end=True)
assert isinstance(result, pd.DataFrame)
assert len(result) == 3 # Jan, Feb, Mar
# Each month should have correct aggregations
for i in range(len(result)):
month_data = data[data.index.month == (i + 1)]
assert result.iloc[i]['Open'] == month_data.iloc[0]['Open']
assert result.iloc[i]['Close'] == month_data.iloc[-1]['Close']
assert result.iloc[i]['High'] == month_data['High'].max()
assert result.iloc[i]['Low'] == month_data['Low'].min()
assert result.iloc[i]['Volume'] == month_data['Volume'].sum()
def test_intraday_to_daily_aggregation(self):
"""Should handle intraday data aggregation to daily."""
from tradingagents.dataflows.multi_timeframe import _resample_ohlcv
# Create 1 day of hourly data (9:30 AM to 4:00 PM = 7 hours)
dates = pd.date_range('2024-01-15 09:30', periods=7, freq='h')
data = pd.DataFrame({
'Open': [100.0, 101.0, 100.5, 102.0, 101.5, 103.0, 102.5],
'High': [101.5, 102.0, 101.5, 103.0, 102.5, 104.0, 103.5],
'Low': [99.5, 100.5, 100.0, 101.5, 101.0, 102.5, 102.0],
'Close': [101.0, 100.5, 102.0, 101.5, 103.0, 102.5, 103.5],
'Volume': [100000, 150000, 120000, 180000, 140000, 160000, 110000],
}, index=dates)
# Aggregate to daily using 'D' frequency
result = _resample_ohlcv(data, freq='D', label='right', closed='right')
assert isinstance(result, pd.DataFrame)
assert len(result) == 1
# Verify daily aggregation
assert result.iloc[0]['Open'] == 100.0 # First hour's open
assert result.iloc[0]['High'] == 104.0 # Max of all hours
assert result.iloc[0]['Low'] == 99.5 # Min of all hours
assert result.iloc[0]['Close'] == 103.5 # Last hour's close
assert result.iloc[0]['Volume'] == 960000 # Sum of all hours
def test_chained_aggregations(self):
"""Should support chaining aggregations (daily -> weekly -> monthly)."""
from tradingagents.dataflows.multi_timeframe import aggregate_to_weekly, aggregate_to_monthly
# Create 60 days of daily data
dates = pd.date_range('2024-01-01', periods=60, freq='D')
data = pd.DataFrame({
'Open': [100.0 + i * 0.1 for i in range(60)],
'High': [102.0 + i * 0.1 for i in range(60)],
'Low': [99.0 + i * 0.1 for i in range(60)],
'Close': [101.0 + i * 0.1 for i in range(60)],
'Volume': [1000000 + i * 1000 for i in range(60)],
}, index=dates)
original_volume = data['Volume'].sum()
# Daily -> Weekly
weekly = aggregate_to_weekly(data, anchor='SUN')
assert isinstance(weekly, pd.DataFrame)
assert weekly['Volume'].sum() == original_volume
# Weekly -> Monthly (aggregate weekly data to monthly)
monthly = aggregate_to_monthly(weekly, period_end=True)
assert isinstance(monthly, pd.DataFrame)
assert monthly['Volume'].sum() == original_volume
# Verify monthly matches direct daily -> monthly
monthly_direct = aggregate_to_monthly(data, period_end=True)
assert isinstance(monthly_direct, pd.DataFrame)
# Both paths should preserve total volume
assert monthly['Volume'].sum() == monthly_direct['Volume'].sum()
def test_empty_result_handling(self):
"""Should handle cases where resampling produces empty results."""
from tradingagents.dataflows.multi_timeframe import aggregate_to_weekly
# Create data with only NaN values
dates = pd.date_range('2024-01-01', periods=7, freq='D')
data = pd.DataFrame({
'Open': [np.nan] * 7,
'High': [np.nan] * 7,
'Low': [np.nan] * 7,
'Close': [np.nan] * 7,
'Volume': [0] * 7,
}, index=dates)
result = aggregate_to_weekly(data, anchor='SUN')
# Should still return a DataFrame (even if values are NaN)
assert isinstance(result, pd.DataFrame)
def test_mixed_frequency_data(self):
"""Should handle data with mixed frequencies (some days missing)."""
from tradingagents.dataflows.multi_timeframe import aggregate_to_weekly
# Create irregular dates (not every day)
dates = pd.to_datetime([
'2024-01-01', '2024-01-02', '2024-01-04', # Missing Jan 3
'2024-01-08', '2024-01-09', # Missing Jan 5-7
'2024-01-15', '2024-01-16' # Missing Jan 10-14
])
data = pd.DataFrame({
'Open': [100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.0],
'High': [102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0],
'Low': [99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0],
'Close': [101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0],
'Volume': [1000000, 1100000, 1200000, 1300000, 1400000, 1500000, 1600000],
}, index=dates)
result = aggregate_to_weekly(data, anchor='SUN')
assert isinstance(result, pd.DataFrame)
assert len(result) > 0
# Volume should be preserved
assert result['Volume'].sum() == data['Volume'].sum()
def test_leap_year_february(self):
"""Should handle February in leap year correctly."""
from tradingagents.dataflows.multi_timeframe import aggregate_to_monthly
# 2024 is a leap year (29 days in Feb)
dates = pd.date_range('2024-02-01', '2024-02-29', freq='D')
data = pd.DataFrame({
'Open': [100.0 + i * 0.1 for i in range(len(dates))],
'High': [102.0 + i * 0.1 for i in range(len(dates))],
'Low': [99.0 + i * 0.1 for i in range(len(dates))],
'Close': [101.0 + i * 0.1 for i in range(len(dates))],
'Volume': [1000000 + i * 1000 for i in range(len(dates))],
}, index=dates)
result = aggregate_to_monthly(data, period_end=True)
assert isinstance(result, pd.DataFrame)
assert len(result) == 1
assert result.index[0].day == 29 # Should end on Feb 29
# Verify aggregation
assert result.iloc[0]['Open'] == data.iloc[0]['Open']
assert result.iloc[0]['Close'] == data.iloc[-1]['Close']
assert result.iloc[0]['Volume'] == data['Volume'].sum()
def test_year_end_rollover(self):
"""Should handle year-end rollover correctly."""
from tradingagents.dataflows.multi_timeframe import aggregate_to_weekly, aggregate_to_monthly
# Create data spanning year boundary
dates = pd.date_range('2023-12-25', '2024-01-05', freq='D')
data = pd.DataFrame({
'Open': [100.0 + i * 0.1 for i in range(len(dates))],
'High': [102.0 + i * 0.1 for i in range(len(dates))],
'Low': [99.0 + i * 0.1 for i in range(len(dates))],
'Close': [101.0 + i * 0.1 for i in range(len(dates))],
'Volume': [1000000 + i * 1000 for i in range(len(dates))],
}, index=dates)
# Weekly aggregation
weekly = aggregate_to_weekly(data, anchor='SUN')
assert isinstance(weekly, pd.DataFrame)
assert weekly['Volume'].sum() == data['Volume'].sum()
# Monthly aggregation
monthly = aggregate_to_monthly(data, period_end=True)
assert isinstance(monthly, pd.DataFrame)
assert len(monthly) == 2 # December and January
assert monthly['Volume'].sum() == data['Volume'].sum()