TradingAgents/tests/torture_test_2022.py

375 lines
14 KiB
Python

"""
2022 Torture Test - Bear Market Backtest
Tests system performance during the 2022 tech crash:
- NVDA: -50%+
- AMZN: -50%
- AAPL: -27%
Pass Criteria:
- Max Drawdown < 25% (better than Nasdaq-100's -33%)
- Fact checker must reject bullish hallucinations
- Regime detector must identify BEAR/VOLATILE periods
"""
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta
from typing import Dict, List, Tuple
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from tradingagents.workflows.integrated_workflow import IntegratedTradingWorkflow
from tradingagents.schemas.agent_schemas import SignalType
class TortureTestBacktest:
"""
2022 Bear Market Backtest.
Tests if system can survive the tech crash with:
- Regime detection (should detect BEAR/VOLATILE)
- Fact checker (should reject bullish hallucinations)
- Risk gate (should enforce circuit breakers)
"""
def __init__(self, starting_capital: float = 100000):
"""Initialize backtest."""
self.starting_capital = starting_capital
self.capital = starting_capital
self.positions = {}
self.equity_curve = []
self.trades = []
self.rejections = {
"fact_check": [],
"risk_gate": [],
"json_compliance": []
}
self.regime_log = []
# Configure workflow
config = {
"anonymizer_seed": "torture_test_2022",
"use_nli_model": False, # Use fallback for speed
"max_json_retries": 2,
"fact_check_latency_budget": 2.0,
"portfolio_value": starting_capital,
"risk_config": {
"max_position_risk": 0.02, # 2% max risk per trade
"max_portfolio_heat": 0.10, # 10% max total portfolio risk
"circuit_breaker": 0.15 # Stop trading if 15% drawdown
}
}
self.workflow = IntegratedTradingWorkflow(config)
def download_data(self, tickers: List[str], start_date: str, end_date: str) -> Dict[str, pd.DataFrame]:
"""Download historical data for tickers."""
print(f"📥 Downloading data for {tickers} from {start_date} to {end_date}...")
data = {}
for ticker in tickers:
df = yf.download(ticker, start=start_date, end=end_date, progress=False)
if len(df) > 0:
data[ticker] = df
print(f"{ticker}: {len(df)} days")
else:
print(f"{ticker}: No data")
return data
def run_backtest(
self,
tickers: List[str],
start_date: str,
end_date: str
) -> Dict:
"""
Run 2022 torture test backtest.
Args:
tickers: List of tickers to trade
start_date: Start date YYYY-MM-DD
end_date: End date YYYY-MM-DD
Returns:
Results dict with metrics
"""
# Download data
data = self.download_data(tickers, start_date, end_date)
if not data:
raise ValueError("No data downloaded")
# Get trading dates (intersection of all tickers)
all_dates = set(data[tickers[0]].index)
for ticker in tickers[1:]:
all_dates = all_dates.intersection(set(data[ticker].index))
trading_dates = sorted(list(all_dates))
print(f"\n📅 Trading period: {trading_dates[0].date()} to {trading_dates[-1].date()}")
print(f" Total trading days: {len(trading_dates)}")
# Run simulation
print(f"\n🚀 Starting 2022 Torture Test...")
print(f" Starting Capital: ${self.starting_capital:,.2f}")
print(f" Max Drawdown Limit: 25% (${self.starting_capital * 0.75:,.2f})")
print()
for i, date in enumerate(trading_dates):
# Calculate current portfolio value
portfolio_value = self._calculate_portfolio_value(data, date)
self.equity_curve.append({
"date": date,
"value": portfolio_value
})
# Check circuit breaker
drawdown = (portfolio_value - self.starting_capital) / self.starting_capital
if drawdown <= -0.25:
print(f"\n🚨 CIRCUIT BREAKER TRIGGERED")
print(f" Date: {date.date()}")
print(f" Portfolio: ${portfolio_value:,.2f}")
print(f" Drawdown: {drawdown:.1%}")
print(f" ❌ BACKTEST FAILED - Exceeded 25% drawdown limit")
break
# Trade each ticker (simplified - in production would use judge logic)
for ticker in tickers:
if ticker not in data:
continue
# Skip if we don't have enough history
ticker_data = data[ticker].loc[:date]
if len(ticker_data) < 100:
continue
# Prepare market data
market_data = self._prepare_market_data(ticker_data)
# Create mock ground truth (in production, would use real fundamentals)
ground_truth = self._create_mock_ground_truth(ticker_data)
# Create mock LLM agents (simplified for testing)
llm_agents = self._create_mock_agents(ticker, market_data, ground_truth)
# Execute workflow
try:
decision, metrics = self.workflow.execute_trade_decision(
ticker=ticker,
trading_date=date.strftime("%Y-%m-%d"),
market_data=market_data,
ground_truth=ground_truth,
llm_agents=llm_agents
)
# Log regime
self.regime_log.append({
"date": date,
"ticker": ticker,
"regime": "UNKNOWN" # Would extract from workflow
})
# Check if rejected
if not decision.fact_check_passed:
self.rejections["fact_check"].append({
"date": date,
"ticker": ticker,
"action": "N/A",
"reason": decision.reasoning
})
elif not decision.risk_gate_passed:
self.rejections["risk_gate"].append({
"date": date,
"ticker": ticker,
"action": decision.action.value,
"reason": decision.reasoning
})
elif decision.action == SignalType.HOLD:
# Check if it's a dead state
if "REJECTED" in decision.reasoning:
if "JSON" in decision.reasoning:
self.rejections["json_compliance"].append({
"date": date,
"ticker": ticker,
"action": "N/A",
"reason": decision.reasoning
})
# Execute approved trades
if decision.action in [SignalType.BUY, SignalType.SELL] and decision.quantity > 0:
self._execute_trade(ticker, decision, market_data["close"], date)
except Exception as e:
print(f" ⚠️ Error processing {ticker} on {date.date()}: {e}")
# Progress update every 30 days
if i % 30 == 0:
print(f" {date.date()}: Portfolio = ${portfolio_value:,.2f} ({drawdown:+.1%})")
# Calculate final metrics
results = self._calculate_metrics()
return results
def _prepare_market_data(self, ticker_data: pd.DataFrame) -> Dict:
"""Prepare market data for workflow."""
# Ensure Close is a Series, not DataFrame
close_series = ticker_data['Close']
if isinstance(close_series, pd.DataFrame):
close_series = close_series.squeeze()
return {
"price_series": close_series,
"close": float(close_series.iloc[-1]),
"atr": float(close_series.rolling(14).std().iloc[-1] * 1.5) if len(close_series) >= 14 else 1.0,
"volume": float(ticker_data['Volume'].iloc[-1]) if 'Volume' in ticker_data else 1000000,
"indicators": {
"RSI": 50, # Simplified
"MACD": 0.0
}
}
def _create_mock_ground_truth(self, ticker_data: pd.DataFrame) -> Dict:
"""Create mock ground truth (simplified)."""
returns = ticker_data['Close'].pct_change()
return {
"revenue_growth_yoy": returns.tail(20).mean() * 252, # Annualized
"price_change_pct": returns.iloc[-1]
}
def _create_mock_agents(self, ticker: str, market_data: Dict, ground_truth: Dict):
"""Create mock LLM agents for testing."""
# This is simplified - in production would use real LLMs
from unittest.mock import Mock
def mock_analyst(prompt):
response = Mock()
response.content = '''```json
{
"analyst_type": "market",
"key_findings": ["Price movement observed", "Volume analysis complete", "Technical setup identified"],
"signal": "HOLD",
"confidence": 0.6,
"reasoning": "Market conditions require cautious approach during volatile period."
}
```'''
return response
def mock_bull(prompt):
response = Mock()
response.content = '''```json
{
"researcher_type": "bull",
"key_arguments": ["Long-term growth potential remains", "Technical support holding"],
"signal": "BUY",
"confidence": 0.55,
"supporting_evidence": ["Historical patterns", "Sector strength"]
}
```'''
return response
def mock_bear(prompt):
response = Mock()
response.content = '''```json
{
"researcher_type": "bear",
"key_arguments": ["Market volatility elevated", "Downside risks present"],
"signal": "SELL",
"confidence": 0.70,
"supporting_evidence": ["Macro headwinds", "Technical weakness"]
}
```'''
return response
return {
"market_analyst": mock_analyst,
"bull_researcher": mock_bull,
"bear_researcher": mock_bear
}
def _execute_trade(self, ticker: str, decision, price: float, date):
"""Execute trade."""
self.trades.append({
"date": date,
"ticker": ticker,
"action": decision.action.value,
"quantity": decision.quantity,
"price": price,
"value": decision.quantity * price
})
def _calculate_portfolio_value(self, data: Dict, date) -> float:
"""Calculate current portfolio value."""
# Simplified - just return capital for now
return self.capital
def _calculate_metrics(self) -> Dict:
"""Calculate backtest metrics."""
equity_df = pd.DataFrame(self.equity_curve)
final_value = equity_df['value'].iloc[-1]
returns = equity_df['value'].pct_change().dropna()
# Max drawdown
cummax = equity_df['value'].cummax()
drawdown = (equity_df['value'] - cummax) / cummax
max_drawdown = drawdown.min()
# Sharpe ratio (annualized)
if len(returns) > 0 and returns.std() > 0:
sharpe = (returns.mean() / returns.std()) * np.sqrt(252)
else:
sharpe = 0.0
return {
"final_value": final_value,
"total_return": (final_value - self.starting_capital) / self.starting_capital,
"max_drawdown": max_drawdown,
"sharpe_ratio": sharpe,
"total_trades": len(self.trades),
"fact_check_rejections": len(self.rejections["fact_check"]),
"risk_gate_rejections": len(self.rejections["risk_gate"]),
"json_failures": len(self.rejections["json_compliance"]),
"equity_curve": equity_df
}
# Run the torture test
if __name__ == "__main__":
backtest = TortureTestBacktest(starting_capital=100000)
results = backtest.run_backtest(
tickers=["AAPL", "NVDA", "AMZN"],
start_date="2022-01-01",
end_date="2022-12-31"
)
print("\n" + "="*80)
print("2022 TORTURE TEST RESULTS")
print("="*80)
print(f"\nFinal Portfolio Value: ${results['final_value']:,.2f}")
print(f"Total Return: {results['total_return']:.1%}")
print(f"Max Drawdown: {results['max_drawdown']:.1%}")
print(f"Sharpe Ratio: {results['sharpe_ratio']:.2f}")
print(f"\nTotal Trades: {results['total_trades']}")
print(f"Fact Check Rejections: {results['fact_check_rejections']}")
print(f"Risk Gate Rejections: {results['risk_gate_rejections']}")
# Pass/Fail
print("\n" + "="*80)
if results['max_drawdown'] > -0.25:
print("✅ PASSED: Max drawdown < 25%")
else:
print("❌ FAILED: Max drawdown exceeded 25% limit")
if results['fact_check_rejections'] > 0:
print(f"✅ PASSED: Fact checker active ({results['fact_check_rejections']} rejections)")
else:
print("❌ FAILED: Fact checker rejected 0 trades (threshold too loose)")