TradingAgents/tests/ignition_tests.py

389 lines
14 KiB
Python

"""
Phase 7: Ignition Tests - Prove the System Works
Three isolated tests:
1. Hallucination Trap - Fact checker must reject "500% revenue growth" lie
2. Falling Knife - Regime detector must prevent buying NVDA crash (Jan 27, 2022)
3. Live Round - System must execute actual trade during March 2022 rally
"""
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from tradingagents.workflows.integrated_workflow import IntegratedTradingWorkflow
from tradingagents.schemas.agent_schemas import SignalType
from tradingagents.engines.regime_detector import RegimeDetector
from unittest.mock import Mock
class IgnitionTests:
"""
Phase 7: Ignition Tests
Prove the system works with real logic, not mocks.
"""
def __init__(self):
"""Initialize test harness."""
self.config = {
"anonymizer_seed": "ignition_test",
"use_nli_model": False, # Use fallback for speed
"max_json_retries": 2,
"fact_check_latency_budget": 2.0,
"portfolio_value": 100000,
"risk_config": {
"max_position_risk": 0.02,
"max_portfolio_heat": 0.10,
"circuit_breaker": 0.15
}
}
self.workflow = IntegratedTradingWorkflow(self.config)
self.regime_detector = RegimeDetector()
def test_1_hallucination_trap(self):
"""
TEST 1: HALLUCINATION TRAP
Inject: "Apple revenue grew 500% last quarter"
Ground Truth: Revenue grew 8%
Expected: FACT_CHECK_FAILURE
"""
print("\n" + "="*80)
print("TEST 1: HALLUCINATION TRAP")
print("="*80)
print("\n🎯 Objective: Prove fact checker rejects obvious hallucination")
print(" Injection: 'Apple revenue grew 500% last quarter'")
print(" Ground Truth: Revenue grew 8%")
print(" Expected: 🚫 REJECTED - FACT_CHECK_FAILURE\n")
# Create mock agents with HALLUCINATION
def mock_analyst(prompt):
response = Mock()
# CRITICAL: Valid JSON without markdown blocks
response.content = '''{
"analyst_type": "market",
"key_findings": ["Strong momentum", "Volume increasing", "Breakout pattern"],
"signal": "BUY",
"confidence": 0.75,
"reasoning": "Technical setup looks bullish with strong volume confirmation and breakout above resistance."
}'''
return response
def mock_bull_HALLUCINATION(prompt):
"""INJECTED HALLUCINATION - VALID JSON FORMAT"""
response = Mock()
# CRITICAL: This is VALID JSON with a LIE in the content
response.content = '''{
"researcher_type": "bull",
"key_arguments": [
"Apple revenue grew 500% last quarter, signaling massive adoption",
"Earnings beat expectations significantly"
],
"signal": "BUY",
"confidence": 0.99,
"supporting_evidence": ["Q4 earnings", "Market share gains"]
}'''
return response
def mock_bear(prompt):
response = Mock()
# CRITICAL: Valid JSON without markdown blocks
response.content = '''{
"researcher_type": "bear",
"key_arguments": [
"Valuation stretched at current levels",
"Competition intensifying in key markets"
],
"signal": "HOLD",
"confidence": 0.60,
"supporting_evidence": ["P/E ratio elevated", "Market dynamics shifting"]
}'''
return response
# Ground truth: Revenue actually grew 8%
ground_truth = {
"revenue_growth_yoy": 0.08, # 8% growth
"price_change_pct": 0.02
}
# Mock market data
dates = pd.date_range('2022-01-01', periods=100, freq='D')
prices = pd.Series(150 + np.cumsum(np.random.randn(100) * 0.5), index=dates)
market_data = {
"price_series": prices,
"close": 155.0,
"atr": 2.5,
"volume": 50000000,
"indicators": {"RSI": 55, "MACD": 0.5}
}
llm_agents = {
"market_analyst": mock_analyst,
"bull_researcher": mock_bull_HALLUCINATION, # HALLUCINATION INJECTED
"bear_researcher": mock_bear
}
# Execute workflow
decision, metrics = self.workflow.execute_trade_decision(
ticker="AAPL",
trading_date="2022-01-15",
market_data=market_data,
ground_truth=ground_truth,
llm_agents=llm_agents
)
# Validate result
print("\n📋 RESULT:")
print(f" Decision: {decision.action.value}")
print(f" Fact Check Passed: {decision.fact_check_passed}")
print(f" Reasoning: {decision.reasoning}")
if not decision.fact_check_passed:
print("\n✅ TEST 1 PASSED: Fact checker rejected hallucination")
print(f" Rejection: {decision.reasoning}")
return True
else:
print("\n❌ TEST 1 FAILED: Fact checker approved hallucination!")
print(f" This is a CRITICAL FAILURE - system validated a 500% lie")
return False
def test_2_falling_knife(self):
"""
TEST 2: FALLING KNIFE
Date: January 27, 2022 (NVDA crash)
RSI: < 30 (oversold)
Expected: Regime = BEAR/VOLATILE, Signal = HOLD (not BUY)
"""
print("\n" + "="*80)
print("TEST 2: FALLING KNIFE DETECTION")
print("="*80)
print("\n🎯 Objective: Prove system won't buy a falling knife")
print(" Date: January 27, 2022 (NVDA -3.6% crash)")
print(" RSI: < 30 (oversold)")
print(" Expected: Regime = VOLATILE/BEAR, Signal = HOLD\n")
# Download real NVDA data for Jan 2022 with 100-day buffer
print("📥 Downloading NVDA data for January 2022 (with 100-day warm-up buffer)...")
# CRITICAL: Add 100-day buffer for indicator warm-up
nvda_data = yf.download("NVDA", start="2021-10-01", end="2022-02-01", progress=False)
if len(nvda_data) == 0:
print("❌ Failed to download data")
return False
# Get data up to Jan 27, 2022
crash_date = pd.Timestamp("2022-01-27")
nvda_jan27 = nvda_data.loc[:crash_date]
# Extract price series
close_series = nvda_jan27['Close']
if isinstance(close_series, pd.DataFrame):
close_series = close_series.squeeze()
print(f" Data points: {len(close_series)}")
print(f" Price on Jan 27: ${close_series.iloc[-1]:.2f}")
print(f" Price 5 days ago: ${close_series.iloc[-6]:.2f}")
print(f" 5-day change: {((close_series.iloc[-1] / close_series.iloc[-6]) - 1) * 100:.1f}%")
# Detect regime
print("\n🔬 Running regime detection...")
regime, metrics = self.regime_detector.detect_regime(close_series, window=60)
print(f"\n📊 REGIME DETECTION RESULT:")
print(f" Regime: {regime.value.upper()}")
print(f" Volatility: {metrics['volatility']:.1%}")
print(f" Trend Strength (ADX): {metrics['trend_strength']:.1f}")
print(f" Cumulative Return: {metrics['cumulative_return']:.1%}")
print(f" Hurst Exponent: {metrics['hurst_exponent']:.2f}")
# Check if regime is BEAR or VOLATILE
is_dangerous = regime.value in ["trending_down", "volatile"]
if is_dangerous:
print(f"\n✅ TEST 2 PASSED: Regime correctly identified as {regime.value.upper()}")
print(f" System should NOT buy the dip in this regime")
return True
else:
print(f"\n❌ TEST 2 FAILED: Regime classified as {regime.value.upper()}")
print(f" This is DANGEROUS - system might buy a falling knife")
return False
def test_3_live_round(self):
"""
TEST 3: LIVE ROUND
Date: March 15-18, 2022 (Relief rally)
Action: Allow system to trade normally
Expected: Successfully execute a BUY trade
"""
print("\n" + "="*80)
print("TEST 3: LIVE ROUND (TRADE EXECUTION)")
print("="*80)
print("\n🎯 Objective: Prove system can execute actual trade")
print(" Date: March 15, 2022 (Relief rally)")
print(" Expected: Successfully BUY a position\n")
# Download real data for March 2022 with 100-day buffer
print("📥 Downloading AAPL data for March 2022 (with 100-day warm-up buffer)...")
# CRITICAL: Add 100-day buffer for indicator warm-up
aapl_data = yf.download("AAPL", start="2021-11-01", end="2022-03-20", progress=False)
if len(aapl_data) == 0:
print("❌ Failed to download data")
return False
# Get data up to March 15
trade_date = pd.Timestamp("2022-03-15")
aapl_mar15 = aapl_data.loc[:trade_date]
# Extract price series
close_series = aapl_mar15['Close']
if isinstance(close_series, pd.DataFrame):
close_series = close_series.squeeze()
print(f" Data points: {len(close_series)}")
print(f" Price on Mar 15: ${close_series.iloc[-1]:.2f}")
# Create bullish mock agents
def mock_analyst(prompt):
response = Mock()
response.content = '''```json
{
"analyst_type": "market",
"key_findings": ["Relief rally underway", "Oversold bounce", "Volume confirming"],
"signal": "BUY",
"confidence": 0.70,
"reasoning": "Technical bounce from oversold levels with volume."
}
```'''
return response
def mock_bull(prompt):
response = Mock()
response.content = '''```json
{
"researcher_type": "bull",
"key_arguments": [
"Market finding support after selloff",
"Technical indicators showing reversal"
],
"signal": "BUY",
"confidence": 0.75,
"supporting_evidence": ["RSI bounce", "Volume spike"]
}
```'''
return response
def mock_bear(prompt):
response = Mock()
response.content = '''```json
{
"researcher_type": "bear",
"key_arguments": [
"Rally may be short-lived",
"Macro headwinds persist"
],
"signal": "HOLD",
"confidence": 0.55,
"supporting_evidence": ["Fed policy", "Inflation"]
}
```'''
return response
# Ground truth
returns = close_series.pct_change()
ground_truth = {
"revenue_growth_yoy": 0.05,
"price_change_pct": returns.iloc[-1]
}
# Market data
market_data = {
"price_series": close_series,
"close": float(close_series.iloc[-1]),
"atr": float(close_series.rolling(14).std().iloc[-1] * 1.5),
"volume": 50000000,
"indicators": {"RSI": 45, "MACD": 0.3}
}
llm_agents = {
"market_analyst": mock_analyst,
"bull_researcher": mock_bull,
"bear_researcher": mock_bear
}
# Execute workflow
print("\n🚀 Executing trade decision...")
decision, metrics = self.workflow.execute_trade_decision(
ticker="AAPL",
trading_date="2022-03-15",
market_data=market_data,
ground_truth=ground_truth,
llm_agents=llm_agents
)
# Validate result
print("\n📋 RESULT:")
print(f" Action: {decision.action.value}")
print(f" Quantity: {decision.quantity}")
print(f" Confidence: {decision.confidence:.2f}")
print(f" Fact Check Passed: {decision.fact_check_passed}")
print(f" Risk Gate Passed: {decision.risk_gate_passed}")
if decision.action == SignalType.BUY and decision.quantity > 0:
print(f"\n✅ TEST 3 PASSED: Successfully executed BUY trade")
print(f" Quantity: {decision.quantity} shares")
print(f" Stop Loss: ${decision.stop_loss:.2f}")
print(f" Risk: {decision.risk_pct:.2%}")
return True
else:
print(f"\n❌ TEST 3 FAILED: Could not execute trade")
print(f" Reasoning: {decision.reasoning}")
return False
# Run ignition tests
if __name__ == "__main__":
print("\n" + "="*80)
print("PHASE 7: IGNITION TESTS")
print("="*80)
print("\nProving the system works with real logic, not mocks.\n")
tests = IgnitionTests()
# Run all three tests
results = {
"test_1_hallucination": tests.test_1_hallucination_trap(),
"test_2_falling_knife": tests.test_2_falling_knife(),
"test_3_live_round": tests.test_3_live_round()
}
# Summary
print("\n" + "="*80)
print("IGNITION TEST SUMMARY")
print("="*80)
for test_name, passed in results.items():
status = "✅ PASS" if passed else "❌ FAIL"
print(f"{test_name}: {status}")
all_passed = all(results.values())
print("\n" + "="*80)
if all_passed:
print("✅ ALL IGNITION TESTS PASSED")
print(" System is ready for live trading")
else:
print("❌ IGNITION TESTS FAILED")
print(" System is NOT ready for production")
print("="*80)