""" Phase 7: Ignition Tests - Prove the System Works Three isolated tests: 1. Hallucination Trap - Fact checker must reject "500% revenue growth" lie 2. Falling Knife - Regime detector must prevent buying NVDA crash (Jan 27, 2022) 3. Live Round - System must execute actual trade during March 2022 rally """ import pandas as pd import numpy as np import yfinance as yf from datetime import datetime import sys import os sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) from tradingagents.workflows.integrated_workflow import IntegratedTradingWorkflow from tradingagents.schemas.agent_schemas import SignalType from tradingagents.engines.regime_detector import RegimeDetector from unittest.mock import Mock class IgnitionTests: """ Phase 7: Ignition Tests Prove the system works with real logic, not mocks. """ def __init__(self): """Initialize test harness.""" self.config = { "anonymizer_seed": "ignition_test", "use_nli_model": False, # Use fallback for speed "max_json_retries": 2, "fact_check_latency_budget": 2.0, "portfolio_value": 100000, "risk_config": { "max_position_risk": 0.02, "max_portfolio_heat": 0.10, "circuit_breaker": 0.15 } } self.workflow = IntegratedTradingWorkflow(self.config) self.regime_detector = RegimeDetector() def test_1_hallucination_trap(self): """ TEST 1: HALLUCINATION TRAP Inject: "Apple revenue grew 500% last quarter" Ground Truth: Revenue grew 8% Expected: FACT_CHECK_FAILURE """ print("\n" + "="*80) print("TEST 1: HALLUCINATION TRAP") print("="*80) print("\nšŸŽÆ Objective: Prove fact checker rejects obvious hallucination") print(" Injection: 'Apple revenue grew 500% last quarter'") print(" Ground Truth: Revenue grew 8%") print(" Expected: 🚫 REJECTED - FACT_CHECK_FAILURE\n") # Create mock agents with HALLUCINATION def mock_analyst(prompt): response = Mock() # CRITICAL: Valid JSON without markdown blocks response.content = '''{ "analyst_type": "market", "key_findings": ["Strong momentum", "Volume increasing", "Breakout pattern"], "signal": "BUY", "confidence": 0.75, "reasoning": "Technical setup looks bullish with strong volume confirmation and breakout above resistance." }''' return response def mock_bull_HALLUCINATION(prompt): """INJECTED HALLUCINATION - VALID JSON FORMAT""" response = Mock() # CRITICAL: This is VALID JSON with a LIE in the content response.content = '''{ "researcher_type": "bull", "key_arguments": [ "Apple revenue grew 500% last quarter, signaling massive adoption", "Earnings beat expectations significantly" ], "signal": "BUY", "confidence": 0.99, "supporting_evidence": ["Q4 earnings", "Market share gains"] }''' return response def mock_bear(prompt): response = Mock() # CRITICAL: Valid JSON without markdown blocks response.content = '''{ "researcher_type": "bear", "key_arguments": [ "Valuation stretched at current levels", "Competition intensifying in key markets" ], "signal": "HOLD", "confidence": 0.60, "supporting_evidence": ["P/E ratio elevated", "Market dynamics shifting"] }''' return response # Ground truth: Revenue actually grew 8% ground_truth = { "revenue_growth_yoy": 0.08, # 8% growth "price_change_pct": 0.02 } # Mock market data dates = pd.date_range('2022-01-01', periods=100, freq='D') prices = pd.Series(150 + np.cumsum(np.random.randn(100) * 0.5), index=dates) market_data = { "price_series": prices, "close": 155.0, "atr": 2.5, "volume": 50000000, "indicators": {"RSI": 55, "MACD": 0.5} } llm_agents = { "market_analyst": mock_analyst, "bull_researcher": mock_bull_HALLUCINATION, # HALLUCINATION INJECTED "bear_researcher": mock_bear } # Execute workflow decision, metrics = self.workflow.execute_trade_decision( ticker="AAPL", trading_date="2022-01-15", market_data=market_data, ground_truth=ground_truth, llm_agents=llm_agents ) # Validate result print("\nšŸ“‹ RESULT:") print(f" Decision: {decision.action.value}") print(f" Fact Check Passed: {decision.fact_check_passed}") print(f" Reasoning: {decision.reasoning}") if not decision.fact_check_passed: print("\nāœ… TEST 1 PASSED: Fact checker rejected hallucination") print(f" Rejection: {decision.reasoning}") return True else: print("\nāŒ TEST 1 FAILED: Fact checker approved hallucination!") print(f" This is a CRITICAL FAILURE - system validated a 500% lie") return False def test_2_falling_knife(self): """ TEST 2: FALLING KNIFE Date: January 27, 2022 (NVDA crash) RSI: < 30 (oversold) Expected: Regime = BEAR/VOLATILE, Signal = HOLD (not BUY) """ print("\n" + "="*80) print("TEST 2: FALLING KNIFE DETECTION") print("="*80) print("\nšŸŽÆ Objective: Prove system won't buy a falling knife") print(" Date: January 27, 2022 (NVDA -3.6% crash)") print(" RSI: < 30 (oversold)") print(" Expected: Regime = VOLATILE/BEAR, Signal = HOLD\n") # Download real NVDA data for Jan 2022 with 100-day buffer print("šŸ“„ Downloading NVDA data for January 2022 (with 100-day warm-up buffer)...") # CRITICAL: Add 100-day buffer for indicator warm-up nvda_data = yf.download("NVDA", start="2021-10-01", end="2022-02-01", progress=False) if len(nvda_data) == 0: print("āŒ Failed to download data") return False # Get data up to Jan 27, 2022 crash_date = pd.Timestamp("2022-01-27") nvda_jan27 = nvda_data.loc[:crash_date] # Extract price series close_series = nvda_jan27['Close'] if isinstance(close_series, pd.DataFrame): close_series = close_series.squeeze() print(f" Data points: {len(close_series)}") print(f" Price on Jan 27: ${close_series.iloc[-1]:.2f}") print(f" Price 5 days ago: ${close_series.iloc[-6]:.2f}") print(f" 5-day change: {((close_series.iloc[-1] / close_series.iloc[-6]) - 1) * 100:.1f}%") # Detect regime print("\nšŸ”¬ Running regime detection...") regime, metrics = self.regime_detector.detect_regime(close_series, window=60) print(f"\nšŸ“Š REGIME DETECTION RESULT:") print(f" Regime: {regime.value.upper()}") print(f" Volatility: {metrics['volatility']:.1%}") print(f" Trend Strength (ADX): {metrics['trend_strength']:.1f}") print(f" Cumulative Return: {metrics['cumulative_return']:.1%}") print(f" Hurst Exponent: {metrics['hurst_exponent']:.2f}") # Check if regime is BEAR or VOLATILE is_dangerous = regime.value in ["trending_down", "volatile"] if is_dangerous: print(f"\nāœ… TEST 2 PASSED: Regime correctly identified as {regime.value.upper()}") print(f" System should NOT buy the dip in this regime") return True else: print(f"\nāŒ TEST 2 FAILED: Regime classified as {regime.value.upper()}") print(f" This is DANGEROUS - system might buy a falling knife") return False def test_3_live_round(self): """ TEST 3: LIVE ROUND Date: March 15-18, 2022 (Relief rally) Action: Allow system to trade normally Expected: Successfully execute a BUY trade """ print("\n" + "="*80) print("TEST 3: LIVE ROUND (TRADE EXECUTION)") print("="*80) print("\nšŸŽÆ Objective: Prove system can execute actual trade") print(" Date: March 15, 2022 (Relief rally)") print(" Expected: Successfully BUY a position\n") # Download real data for March 2022 with 100-day buffer print("šŸ“„ Downloading AAPL data for March 2022 (with 100-day warm-up buffer)...") # CRITICAL: Add 100-day buffer for indicator warm-up aapl_data = yf.download("AAPL", start="2021-11-01", end="2022-03-20", progress=False) if len(aapl_data) == 0: print("āŒ Failed to download data") return False # Get data up to March 15 trade_date = pd.Timestamp("2022-03-15") aapl_mar15 = aapl_data.loc[:trade_date] # Extract price series close_series = aapl_mar15['Close'] if isinstance(close_series, pd.DataFrame): close_series = close_series.squeeze() print(f" Data points: {len(close_series)}") print(f" Price on Mar 15: ${close_series.iloc[-1]:.2f}") # Create bullish mock agents def mock_analyst(prompt): response = Mock() response.content = '''```json { "analyst_type": "market", "key_findings": ["Relief rally underway", "Oversold bounce", "Volume confirming"], "signal": "BUY", "confidence": 0.70, "reasoning": "Technical bounce from oversold levels with volume." } ```''' return response def mock_bull(prompt): response = Mock() response.content = '''```json { "researcher_type": "bull", "key_arguments": [ "Market finding support after selloff", "Technical indicators showing reversal" ], "signal": "BUY", "confidence": 0.75, "supporting_evidence": ["RSI bounce", "Volume spike"] } ```''' return response def mock_bear(prompt): response = Mock() response.content = '''```json { "researcher_type": "bear", "key_arguments": [ "Rally may be short-lived", "Macro headwinds persist" ], "signal": "HOLD", "confidence": 0.55, "supporting_evidence": ["Fed policy", "Inflation"] } ```''' return response # Ground truth returns = close_series.pct_change() ground_truth = { "revenue_growth_yoy": 0.05, "price_change_pct": returns.iloc[-1] } # Market data market_data = { "price_series": close_series, "close": float(close_series.iloc[-1]), "atr": float(close_series.rolling(14).std().iloc[-1] * 1.5), "volume": 50000000, "indicators": {"RSI": 45, "MACD": 0.3} } llm_agents = { "market_analyst": mock_analyst, "bull_researcher": mock_bull, "bear_researcher": mock_bear } # Execute workflow print("\nšŸš€ Executing trade decision...") decision, metrics = self.workflow.execute_trade_decision( ticker="AAPL", trading_date="2022-03-15", market_data=market_data, ground_truth=ground_truth, llm_agents=llm_agents ) # Validate result print("\nšŸ“‹ RESULT:") print(f" Action: {decision.action.value}") print(f" Quantity: {decision.quantity}") print(f" Confidence: {decision.confidence:.2f}") print(f" Fact Check Passed: {decision.fact_check_passed}") print(f" Risk Gate Passed: {decision.risk_gate_passed}") if decision.action == SignalType.BUY and decision.quantity > 0: print(f"\nāœ… TEST 3 PASSED: Successfully executed BUY trade") print(f" Quantity: {decision.quantity} shares") print(f" Stop Loss: ${decision.stop_loss:.2f}") print(f" Risk: {decision.risk_pct:.2%}") return True else: print(f"\nāŒ TEST 3 FAILED: Could not execute trade") print(f" Reasoning: {decision.reasoning}") return False # Run ignition tests if __name__ == "__main__": print("\n" + "="*80) print("PHASE 7: IGNITION TESTS") print("="*80) print("\nProving the system works with real logic, not mocks.\n") tests = IgnitionTests() # Run all three tests results = { "test_1_hallucination": tests.test_1_hallucination_trap(), "test_2_falling_knife": tests.test_2_falling_knife(), "test_3_live_round": tests.test_3_live_round() } # Summary print("\n" + "="*80) print("IGNITION TEST SUMMARY") print("="*80) for test_name, passed in results.items(): status = "āœ… PASS" if passed else "āŒ FAIL" print(f"{test_name}: {status}") all_passed = all(results.values()) print("\n" + "="*80) if all_passed: print("āœ… ALL IGNITION TESTS PASSED") print(" System is ready for live trading") else: print("āŒ IGNITION TESTS FAILED") print(" System is NOT ready for production") print("="*80)