375 lines
14 KiB
Python
375 lines
14 KiB
Python
"""
|
|
2022 Torture Test - Bear Market Backtest
|
|
|
|
Tests system performance during the 2022 tech crash:
|
|
- NVDA: -50%+
|
|
- AMZN: -50%
|
|
- AAPL: -27%
|
|
|
|
Pass Criteria:
|
|
- Max Drawdown < 25% (better than Nasdaq-100's -33%)
|
|
- Fact checker must reject bullish hallucinations
|
|
- Regime detector must identify BEAR/VOLATILE periods
|
|
"""
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
import yfinance as yf
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Tuple
|
|
import sys
|
|
import os
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
|
|
from tradingagents.workflows.integrated_workflow import IntegratedTradingWorkflow
|
|
from tradingagents.schemas.agent_schemas import SignalType
|
|
|
|
|
|
class TortureTestBacktest:
|
|
"""
|
|
2022 Bear Market Backtest.
|
|
|
|
Tests if system can survive the tech crash with:
|
|
- Regime detection (should detect BEAR/VOLATILE)
|
|
- Fact checker (should reject bullish hallucinations)
|
|
- Risk gate (should enforce circuit breakers)
|
|
"""
|
|
|
|
def __init__(self, starting_capital: float = 100000):
|
|
"""Initialize backtest."""
|
|
self.starting_capital = starting_capital
|
|
self.capital = starting_capital
|
|
self.positions = {}
|
|
self.equity_curve = []
|
|
self.trades = []
|
|
self.rejections = {
|
|
"fact_check": [],
|
|
"risk_gate": [],
|
|
"json_compliance": []
|
|
}
|
|
self.regime_log = []
|
|
|
|
# Configure workflow
|
|
config = {
|
|
"anonymizer_seed": "torture_test_2022",
|
|
"use_nli_model": False, # Use fallback for speed
|
|
"max_json_retries": 2,
|
|
"fact_check_latency_budget": 2.0,
|
|
"portfolio_value": starting_capital,
|
|
"risk_config": {
|
|
"max_position_risk": 0.02, # 2% max risk per trade
|
|
"max_portfolio_heat": 0.10, # 10% max total portfolio risk
|
|
"circuit_breaker": 0.15 # Stop trading if 15% drawdown
|
|
}
|
|
}
|
|
|
|
self.workflow = IntegratedTradingWorkflow(config)
|
|
|
|
def download_data(self, tickers: List[str], start_date: str, end_date: str) -> Dict[str, pd.DataFrame]:
|
|
"""Download historical data for tickers."""
|
|
print(f"📥 Downloading data for {tickers} from {start_date} to {end_date}...")
|
|
|
|
data = {}
|
|
for ticker in tickers:
|
|
df = yf.download(ticker, start=start_date, end=end_date, progress=False)
|
|
if len(df) > 0:
|
|
data[ticker] = df
|
|
print(f" ✅ {ticker}: {len(df)} days")
|
|
else:
|
|
print(f" ❌ {ticker}: No data")
|
|
|
|
return data
|
|
|
|
def run_backtest(
|
|
self,
|
|
tickers: List[str],
|
|
start_date: str,
|
|
end_date: str
|
|
) -> Dict:
|
|
"""
|
|
Run 2022 torture test backtest.
|
|
|
|
Args:
|
|
tickers: List of tickers to trade
|
|
start_date: Start date YYYY-MM-DD
|
|
end_date: End date YYYY-MM-DD
|
|
|
|
Returns:
|
|
Results dict with metrics
|
|
"""
|
|
# Download data
|
|
data = self.download_data(tickers, start_date, end_date)
|
|
|
|
if not data:
|
|
raise ValueError("No data downloaded")
|
|
|
|
# Get trading dates (intersection of all tickers)
|
|
all_dates = set(data[tickers[0]].index)
|
|
for ticker in tickers[1:]:
|
|
all_dates = all_dates.intersection(set(data[ticker].index))
|
|
|
|
trading_dates = sorted(list(all_dates))
|
|
print(f"\n📅 Trading period: {trading_dates[0].date()} to {trading_dates[-1].date()}")
|
|
print(f" Total trading days: {len(trading_dates)}")
|
|
|
|
# Run simulation
|
|
print(f"\n🚀 Starting 2022 Torture Test...")
|
|
print(f" Starting Capital: ${self.starting_capital:,.2f}")
|
|
print(f" Max Drawdown Limit: 25% (${self.starting_capital * 0.75:,.2f})")
|
|
print()
|
|
|
|
for i, date in enumerate(trading_dates):
|
|
# Calculate current portfolio value
|
|
portfolio_value = self._calculate_portfolio_value(data, date)
|
|
self.equity_curve.append({
|
|
"date": date,
|
|
"value": portfolio_value
|
|
})
|
|
|
|
# Check circuit breaker
|
|
drawdown = (portfolio_value - self.starting_capital) / self.starting_capital
|
|
|
|
if drawdown <= -0.25:
|
|
print(f"\n🚨 CIRCUIT BREAKER TRIGGERED")
|
|
print(f" Date: {date.date()}")
|
|
print(f" Portfolio: ${portfolio_value:,.2f}")
|
|
print(f" Drawdown: {drawdown:.1%}")
|
|
print(f" ❌ BACKTEST FAILED - Exceeded 25% drawdown limit")
|
|
break
|
|
|
|
# Trade each ticker (simplified - in production would use judge logic)
|
|
for ticker in tickers:
|
|
if ticker not in data:
|
|
continue
|
|
|
|
# Skip if we don't have enough history
|
|
ticker_data = data[ticker].loc[:date]
|
|
if len(ticker_data) < 100:
|
|
continue
|
|
|
|
# Prepare market data
|
|
market_data = self._prepare_market_data(ticker_data)
|
|
|
|
# Create mock ground truth (in production, would use real fundamentals)
|
|
ground_truth = self._create_mock_ground_truth(ticker_data)
|
|
|
|
# Create mock LLM agents (simplified for testing)
|
|
llm_agents = self._create_mock_agents(ticker, market_data, ground_truth)
|
|
|
|
# Execute workflow
|
|
try:
|
|
decision, metrics = self.workflow.execute_trade_decision(
|
|
ticker=ticker,
|
|
trading_date=date.strftime("%Y-%m-%d"),
|
|
market_data=market_data,
|
|
ground_truth=ground_truth,
|
|
llm_agents=llm_agents
|
|
)
|
|
|
|
# Log regime
|
|
self.regime_log.append({
|
|
"date": date,
|
|
"ticker": ticker,
|
|
"regime": "UNKNOWN" # Would extract from workflow
|
|
})
|
|
|
|
# Check if rejected
|
|
if not decision.fact_check_passed:
|
|
self.rejections["fact_check"].append({
|
|
"date": date,
|
|
"ticker": ticker,
|
|
"action": "N/A",
|
|
"reason": decision.reasoning
|
|
})
|
|
elif not decision.risk_gate_passed:
|
|
self.rejections["risk_gate"].append({
|
|
"date": date,
|
|
"ticker": ticker,
|
|
"action": decision.action.value,
|
|
"reason": decision.reasoning
|
|
})
|
|
elif decision.action == SignalType.HOLD:
|
|
# Check if it's a dead state
|
|
if "REJECTED" in decision.reasoning:
|
|
if "JSON" in decision.reasoning:
|
|
self.rejections["json_compliance"].append({
|
|
"date": date,
|
|
"ticker": ticker,
|
|
"action": "N/A",
|
|
"reason": decision.reasoning
|
|
})
|
|
|
|
# Execute approved trades
|
|
if decision.action in [SignalType.BUY, SignalType.SELL] and decision.quantity > 0:
|
|
self._execute_trade(ticker, decision, market_data["close"], date)
|
|
|
|
except Exception as e:
|
|
print(f" ⚠️ Error processing {ticker} on {date.date()}: {e}")
|
|
|
|
# Progress update every 30 days
|
|
if i % 30 == 0:
|
|
print(f" {date.date()}: Portfolio = ${portfolio_value:,.2f} ({drawdown:+.1%})")
|
|
|
|
# Calculate final metrics
|
|
results = self._calculate_metrics()
|
|
|
|
return results
|
|
|
|
def _prepare_market_data(self, ticker_data: pd.DataFrame) -> Dict:
|
|
"""Prepare market data for workflow."""
|
|
# Ensure Close is a Series, not DataFrame
|
|
close_series = ticker_data['Close']
|
|
if isinstance(close_series, pd.DataFrame):
|
|
close_series = close_series.squeeze()
|
|
|
|
return {
|
|
"price_series": close_series,
|
|
"close": float(close_series.iloc[-1]),
|
|
"atr": float(close_series.rolling(14).std().iloc[-1] * 1.5) if len(close_series) >= 14 else 1.0,
|
|
"volume": float(ticker_data['Volume'].iloc[-1]) if 'Volume' in ticker_data else 1000000,
|
|
"indicators": {
|
|
"RSI": 50, # Simplified
|
|
"MACD": 0.0
|
|
}
|
|
}
|
|
|
|
def _create_mock_ground_truth(self, ticker_data: pd.DataFrame) -> Dict:
|
|
"""Create mock ground truth (simplified)."""
|
|
returns = ticker_data['Close'].pct_change()
|
|
|
|
return {
|
|
"revenue_growth_yoy": returns.tail(20).mean() * 252, # Annualized
|
|
"price_change_pct": returns.iloc[-1]
|
|
}
|
|
|
|
def _create_mock_agents(self, ticker: str, market_data: Dict, ground_truth: Dict):
|
|
"""Create mock LLM agents for testing."""
|
|
# This is simplified - in production would use real LLMs
|
|
from unittest.mock import Mock
|
|
|
|
def mock_analyst(prompt):
|
|
response = Mock()
|
|
response.content = '''```json
|
|
{
|
|
"analyst_type": "market",
|
|
"key_findings": ["Price movement observed", "Volume analysis complete", "Technical setup identified"],
|
|
"signal": "HOLD",
|
|
"confidence": 0.6,
|
|
"reasoning": "Market conditions require cautious approach during volatile period."
|
|
}
|
|
```'''
|
|
return response
|
|
|
|
def mock_bull(prompt):
|
|
response = Mock()
|
|
response.content = '''```json
|
|
{
|
|
"researcher_type": "bull",
|
|
"key_arguments": ["Long-term growth potential remains", "Technical support holding"],
|
|
"signal": "BUY",
|
|
"confidence": 0.55,
|
|
"supporting_evidence": ["Historical patterns", "Sector strength"]
|
|
}
|
|
```'''
|
|
return response
|
|
|
|
def mock_bear(prompt):
|
|
response = Mock()
|
|
response.content = '''```json
|
|
{
|
|
"researcher_type": "bear",
|
|
"key_arguments": ["Market volatility elevated", "Downside risks present"],
|
|
"signal": "SELL",
|
|
"confidence": 0.70,
|
|
"supporting_evidence": ["Macro headwinds", "Technical weakness"]
|
|
}
|
|
```'''
|
|
return response
|
|
|
|
return {
|
|
"market_analyst": mock_analyst,
|
|
"bull_researcher": mock_bull,
|
|
"bear_researcher": mock_bear
|
|
}
|
|
|
|
def _execute_trade(self, ticker: str, decision, price: float, date):
|
|
"""Execute trade."""
|
|
self.trades.append({
|
|
"date": date,
|
|
"ticker": ticker,
|
|
"action": decision.action.value,
|
|
"quantity": decision.quantity,
|
|
"price": price,
|
|
"value": decision.quantity * price
|
|
})
|
|
|
|
def _calculate_portfolio_value(self, data: Dict, date) -> float:
|
|
"""Calculate current portfolio value."""
|
|
# Simplified - just return capital for now
|
|
return self.capital
|
|
|
|
def _calculate_metrics(self) -> Dict:
|
|
"""Calculate backtest metrics."""
|
|
equity_df = pd.DataFrame(self.equity_curve)
|
|
|
|
final_value = equity_df['value'].iloc[-1]
|
|
returns = equity_df['value'].pct_change().dropna()
|
|
|
|
# Max drawdown
|
|
cummax = equity_df['value'].cummax()
|
|
drawdown = (equity_df['value'] - cummax) / cummax
|
|
max_drawdown = drawdown.min()
|
|
|
|
# Sharpe ratio (annualized)
|
|
if len(returns) > 0 and returns.std() > 0:
|
|
sharpe = (returns.mean() / returns.std()) * np.sqrt(252)
|
|
else:
|
|
sharpe = 0.0
|
|
|
|
return {
|
|
"final_value": final_value,
|
|
"total_return": (final_value - self.starting_capital) / self.starting_capital,
|
|
"max_drawdown": max_drawdown,
|
|
"sharpe_ratio": sharpe,
|
|
"total_trades": len(self.trades),
|
|
"fact_check_rejections": len(self.rejections["fact_check"]),
|
|
"risk_gate_rejections": len(self.rejections["risk_gate"]),
|
|
"json_failures": len(self.rejections["json_compliance"]),
|
|
"equity_curve": equity_df
|
|
}
|
|
|
|
|
|
# Run the torture test
|
|
if __name__ == "__main__":
|
|
backtest = TortureTestBacktest(starting_capital=100000)
|
|
|
|
results = backtest.run_backtest(
|
|
tickers=["AAPL", "NVDA", "AMZN"],
|
|
start_date="2022-01-01",
|
|
end_date="2022-12-31"
|
|
)
|
|
|
|
print("\n" + "="*80)
|
|
print("2022 TORTURE TEST RESULTS")
|
|
print("="*80)
|
|
print(f"\nFinal Portfolio Value: ${results['final_value']:,.2f}")
|
|
print(f"Total Return: {results['total_return']:.1%}")
|
|
print(f"Max Drawdown: {results['max_drawdown']:.1%}")
|
|
print(f"Sharpe Ratio: {results['sharpe_ratio']:.2f}")
|
|
print(f"\nTotal Trades: {results['total_trades']}")
|
|
print(f"Fact Check Rejections: {results['fact_check_rejections']}")
|
|
print(f"Risk Gate Rejections: {results['risk_gate_rejections']}")
|
|
|
|
# Pass/Fail
|
|
print("\n" + "="*80)
|
|
if results['max_drawdown'] > -0.25:
|
|
print("✅ PASSED: Max drawdown < 25%")
|
|
else:
|
|
print("❌ FAILED: Max drawdown exceeded 25% limit")
|
|
|
|
if results['fact_check_rejections'] > 0:
|
|
print(f"✅ PASSED: Fact checker active ({results['fact_check_rejections']} rejections)")
|
|
else:
|
|
print("❌ FAILED: Fact checker rejected 0 trades (threshold too loose)")
|