diff --git a/tests/conftest.py b/tests/conftest.py index 3feb42a9..de8f2751 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -383,3 +383,271 @@ def openrouter_config(): "backend_url": "https://openrouter.ai/api/v1", }) return config + + +# ============================================================================ +# Agent Output Validation Fixtures (Issue #53) +# ============================================================================ + +@pytest.fixture +def sample_agent_state(): + """ + Create a complete sample agent state for testing. + + Provides a fully populated agent state with all required fields + including reports, debate states, and final decision. + + Scope: function (default) + + Returns: + dict: Complete agent state with all fields populated + + Example: + def test_complete_state(sample_agent_state): + assert sample_agent_state["company_of_interest"] == "AAPL" + assert "market_report" in sample_agent_state + """ + return { + "company_of_interest": "AAPL", + "trade_date": "2024-01-15", + "market_report": """ +# Market Analysis for AAPL + +## Technical Indicators +Strong bullish momentum with RSI at 55 and MACD showing positive divergence. +Price has broken through key resistance at $175. + +## Volume Analysis +Above-average volume on recent upward moves indicates strong buyer interest. +Institutional accumulation pattern observed over the past 2 weeks. + +## Price Action +Clear higher highs and higher lows pattern establishing uptrend. +Support level established at $170 with strong buying pressure. + """ + "Additional detailed analysis. " * 30, + "sentiment_report": """ +# Social Media Sentiment Analysis + +## Overall Sentiment +Strongly positive sentiment across major platforms (Twitter, Reddit, StockTwits). +Sentiment score: 8.5/10 based on 10,000+ analyzed posts. + +## Key Themes +- New product launch excitement +- Strong quarterly earnings anticipation +- Innovation leadership recognition + +## Influencer Activity +Major tech influencers bullish on near-term prospects. + """ + "More sentiment details. " * 30, + "news_report": """ +# News Analysis + +## Recent Headlines +- Major product announcement driving positive coverage +- Analyst upgrades from 3 top firms this week +- Partnership announcements in AI space + +## Coverage Tone +85% positive, 10% neutral, 5% negative across 50 major news sources. + +## Impact Assessment +News flow strongly supportive of bullish thesis. + """ + "Additional news analysis. " * 30, + "fundamentals_report": """ +# Fundamental Analysis + +## Financial Metrics +| Metric | Value | Industry Avg | +|--------|-------|--------------| +| P/E | 28 | 25 | +| ROE | 45% | 20% | +| Revenue Growth | 12% | 8% | + +## Balance Sheet +Strong cash position of $150B, low debt-to-equity ratio. + +## Earnings Quality +Consistent earnings growth with strong margins. + """ + "Detailed fundamental analysis. " * 30, + "investment_debate_state": { + "history": "Round 1: Bull presents case for strong buy...\nRound 2: Bear raises concerns about valuation...\nRound 3: Bull counters with growth prospects...", + "count": 3, + "judge_decision": "BUY: Bulls made a compelling case with strong fundamentals and positive momentum", + "bull_history": "Strong fundamentals, positive momentum, innovation leadership", + "bear_history": "Slight valuation concerns, market volatility risk", + "current_response": "Final recommendation is BUY", + }, + "risk_debate_state": { + "history": "Round 1: Risk assessment begins...\nRound 2: Conservative view presented...", + "count": 2, + "judge_decision": "BUY: Risk is acceptable given strong fundamentals", + "risky_history": "High potential upside justifies position", + "safe_history": "Proceed with caution, good fundamentals", + "neutral_history": "Balanced risk-reward at current levels", + "latest_speaker": "neutral", + "current_risky_response": "Strong buy", + "current_safe_response": "Moderate buy", + "current_neutral_response": "Buy with standard position sizing", + }, + "final_trade_decision": "BUY: Strong consensus across all analysis teams. Fundamentals solid, technicals bullish, sentiment positive. Entry at current levels recommended with standard position sizing.", + "investment_plan": "Initiate position with 2% portfolio allocation", + "trader_investment_plan": "Execute market order for calculated position size", + "sender": "trader", + } + + +@pytest.fixture +def sample_agent_state_buy(sample_agent_state): + """ + Sample agent state with BUY decision. + + Returns complete state configured for BUY scenario. + + Scope: function (default) + + Example: + def test_buy_scenario(sample_agent_state_buy): + assert "BUY" in sample_agent_state_buy["final_trade_decision"] + """ + return sample_agent_state + + +@pytest.fixture +def sample_agent_state_sell(): + """ + Sample agent state with SELL decision. + + Provides a complete state where all analyses point to SELL. + + Scope: function (default) + + Returns: + dict: Agent state with SELL decision + + Example: + def test_sell_scenario(sample_agent_state_sell): + assert "SELL" in sample_agent_state_sell["final_trade_decision"] + """ + return { + "company_of_interest": "TSLA", + "trade_date": "2024-01-20", + "market_report": "# Market Analysis\n\nBearish technical pattern with breakdown below support. " + "Detailed analysis. " * 50, + "sentiment_report": "# Sentiment Analysis\n\nNegative sentiment prevailing across platforms. " + "More details. " * 50, + "news_report": "# News Report\n\nMultiple negative headlines and analyst downgrades. " + "Additional coverage. " * 50, + "fundamentals_report": "# Fundamentals\n\nDeteriorating metrics and earnings concerns. " + "Financial details. " * 50, + "investment_debate_state": { + "history": "Round 1: Bear presents strong sell case...\nRound 2: Bull unable to counter effectively...", + "count": 2, + "judge_decision": "SELL: Bears made compelling case with fundamental concerns", + "bull_history": "Limited upside potential", + "bear_history": "Strong downside risk, overvalued", + }, + "risk_debate_state": { + "history": "Round 1: Risk analysis shows high downside...", + "count": 1, + "judge_decision": "SELL: Exit position to preserve capital", + "risky_history": "Too risky, exit recommended", + "safe_history": "Definitely sell", + "neutral_history": "Sell is prudent", + }, + "final_trade_decision": "SELL: Consensus to exit position. Fundamentals weak, technicals bearish, sentiment negative.", + } + + +@pytest.fixture +def sample_agent_state_hold(): + """ + Sample agent state with HOLD decision. + + Provides a complete state where analyses are mixed, leading to HOLD. + + Scope: function (default) + + Returns: + dict: Agent state with HOLD decision + + Example: + def test_hold_scenario(sample_agent_state_hold): + assert "HOLD" in sample_agent_state_hold["final_trade_decision"] + """ + return { + "company_of_interest": "GOOGL", + "trade_date": "2024-01-22", + "market_report": "# Market Analysis\n\nMixed signals with consolidation pattern. " + "Technical details. " * 50, + "sentiment_report": "# Sentiment Analysis\n\nNeutral sentiment, market awaiting catalyst. " + "Sentiment data. " * 50, + "news_report": "# News Report\n\nBalanced news flow, no major catalysts. " + "News details. " * 50, + "fundamentals_report": "# Fundamentals\n\nSolid but not compelling, fairly valued. " + "Financial data. " * 50, + "investment_debate_state": { + "history": "Round 1: Bull and Bear present balanced views...\nRound 2: No clear winner...\nRound 3: Continued debate...", + "count": 3, + "judge_decision": "HOLD: Insufficient conviction either way, maintain position", + "bull_history": "Some positives but not strong", + "bear_history": "Some concerns but not severe", + }, + "risk_debate_state": { + "history": "Round 1: Risk assessment shows balanced profile...", + "count": 1, + "judge_decision": "HOLD: Risk-reward balanced, no action needed", + "risky_history": "Could go either way", + "safe_history": "Wait for clarity", + "neutral_history": "Hold is appropriate", + }, + "final_trade_decision": "HOLD: Mixed signals across analysis teams. Await further clarity before making move.", + } + + +@pytest.fixture +def sample_invest_debate(): + """ + Sample investment debate state. + + Provides a complete investment debate state for isolated testing. + + Scope: function (default) + + Returns: + dict: Investment debate state (InvestDebateState) + + Example: + def test_debate(sample_invest_debate): + assert sample_invest_debate["count"] > 0 + """ + return { + "history": "Round 1: Bull argues for strong buy based on fundamentals...\nRound 2: Bear raises valuation concerns...\nRound 3: Bull counters with growth prospects...", + "count": 3, + "judge_decision": "BUY: Bulls presented stronger evidence", + "bull_history": "Strong fundamentals, positive technicals, good sentiment", + "bear_history": "Valuation slightly stretched, some market risk", + "current_response": "Recommend BUY with conviction", + } + + +@pytest.fixture +def sample_risk_debate(): + """ + Sample risk debate state. + + Provides a complete risk debate state for isolated testing. + + Scope: function (default) + + Returns: + dict: Risk debate state (RiskDebateState) + + Example: + def test_risk_debate(sample_risk_debate): + assert sample_risk_debate["count"] > 0 + """ + return { + "history": "Round 1: Risk analysts evaluate position sizing...\nRound 2: Discussion on risk parameters...", + "count": 2, + "judge_decision": "BUY: Risk acceptable with standard position size", + "risky_history": "Aggressive position justified by strong signals", + "safe_history": "Conservative position appropriate given uncertainty", + "neutral_history": "Standard position sizing recommended", + "latest_speaker": "neutral", + "current_risky_response": "Take larger position", + "current_safe_response": "Take smaller position", + "current_neutral_response": "Standard position is balanced", + } diff --git a/tests/e2e/test_uat_agent_outputs.py b/tests/e2e/test_uat_agent_outputs.py new file mode 100644 index 00000000..3028e867 --- /dev/null +++ b/tests/e2e/test_uat_agent_outputs.py @@ -0,0 +1,495 @@ +""" +UAT (User Acceptance Testing) for Agent Output Quality. + +This module provides end-to-end tests for complete agent workflows: +1. Complete analysis workflow (BUY/SELL/HOLD scenarios) +2. Edge case handling (missing data, conflicting reports) +3. Content quality validation (length, structure, clarity) +4. State integrity checks (field presence, debate coherence) + +All tests use mocked data to avoid real API calls. +""" + +import pytest +from typing import Dict, Any + +from tradingagents.utils.output_validator import ( + validate_agent_state, + validate_decision_quality, + validate_debate_state, + validate_report_completeness, +) + +pytestmark = pytest.mark.e2e + + +# ============================================================================ +# Test Complete Analysis Workflow +# ============================================================================ + +class TestCompleteAnalysisWorkflow: + """Test complete agent analysis workflow for different trading scenarios.""" + + def test_buy_scenario_complete_workflow(self, sample_agent_state_buy): + """ + Test complete BUY scenario workflow. + + Validates: + - All reports generated + - Investment debate concludes with BUY + - Risk debate validates decision + - Final decision is BUY with reasoning + """ + state = sample_agent_state_buy + + # Validate complete state + result = validate_agent_state(state) + + assert result.is_valid is True + assert result.metrics["company_of_interest"] == "AAPL" + assert result.metrics["reports_present"] == 4 + assert result.metrics["final_signal"] == "BUY" + assert result.metrics["investment_debate_valid"] is True + assert result.metrics["risk_debate_valid"] is True + + def test_sell_scenario_complete_workflow(self, sample_agent_state_sell): + """ + Test complete SELL scenario workflow. + + Validates: + - All reports generated + - Investment debate concludes with SELL + - Risk debate validates decision + - Final decision is SELL with reasoning + """ + state = sample_agent_state_sell + + result = validate_agent_state(state) + + assert result.is_valid is True + assert result.metrics["final_signal"] == "SELL" + assert result.metrics["reports_present"] == 4 + + def test_hold_scenario_complete_workflow(self, sample_agent_state_hold): + """ + Test complete HOLD scenario workflow. + + Validates: + - All reports generated + - Investment debate is inconclusive or balanced + - Risk debate recommends caution + - Final decision is HOLD with reasoning + """ + state = sample_agent_state_hold + + result = validate_agent_state(state) + + assert result.is_valid is True + assert result.metrics["final_signal"] == "HOLD" + + def test_workflow_preserves_debate_history(self, sample_agent_state_buy): + """Test that debate history is preserved throughout workflow.""" + state = sample_agent_state_buy + + invest_debate = state["investment_debate_state"] + risk_debate = state["risk_debate_state"] + + # Validate both debates have history + invest_result = validate_debate_state(invest_debate, debate_type="invest") + risk_result = validate_debate_state(risk_debate, debate_type="risk") + + assert invest_result.metrics["history_length"] > 0 + assert risk_result.metrics["history_length"] > 0 + assert invest_result.metrics["count"] > 0 + assert risk_result.metrics["count"] > 0 + + def test_workflow_all_reports_meet_quality_standards(self, sample_agent_state_buy): + """Test that all generated reports meet quality standards.""" + state = sample_agent_state_buy + + reports = [ + state["market_report"], + state["sentiment_report"], + state["news_report"], + state["fundamentals_report"], + ] + + for report in reports: + result = validate_report_completeness( + report, + min_length=500, + require_markdown_tables=False, + require_sections=False, + ) + assert result.is_valid is True + assert result.metrics["length"] >= 500 + + +# ============================================================================ +# Test Edge Case Scenarios +# ============================================================================ + +class TestEdgeCaseScenarios: + """Test handling of edge cases and unusual scenarios.""" + + def test_missing_single_report_graceful_degradation(self): + """Test that workflow continues with one missing report.""" + state = { + "company_of_interest": "TSLA", + "trade_date": "2024-01-20", + "market_report": "Market analysis. " * 100, + "sentiment_report": "Sentiment analysis. " * 100, + "news_report": "News analysis. " * 100, + # Missing fundamentals_report + "investment_debate_state": { + "history": "Debate based on available data", + "count": 3, + "judge_decision": "HOLD: Incomplete data, proceeding cautiously", + }, + "risk_debate_state": { + "history": "Risk assessment", + "count": 2, + "judge_decision": "HOLD: Missing fundamentals increases uncertainty", + }, + "final_trade_decision": "HOLD: Awaiting fundamental data", + } + + result = validate_agent_state(state) + + # Should still be valid but with warnings + assert result.is_valid is True + assert result.metrics["reports_present"] == 3 + assert len(result.warnings) > 0 + + def test_conflicting_debate_conclusions_warning(self): + """Test warning when investment and risk debates conflict.""" + state = { + "company_of_interest": "GOOGL", + "trade_date": "2024-01-22", + "market_report": "Report. " * 100, + "sentiment_report": "Report. " * 100, + "news_report": "Report. " * 100, + "fundamentals_report": "Report. " * 100, + "investment_debate_state": { + "history": "Bullish debate", + "count": 2, + "judge_decision": "BUY: Strong upside potential", + }, + "risk_debate_state": { + "history": "Risk concerns", + "count": 2, + "judge_decision": "SELL: Risk too high", # Conflicts with invest + }, + "final_trade_decision": "HOLD: Conflicting signals from teams", + } + + result = validate_agent_state(state) + + assert result.is_valid is True + # Different signals detected + assert result.metrics.get("final_signal") == "HOLD" + + def test_empty_debate_history_but_valid_decision(self): + """Test handling of empty debate history with valid decision.""" + state = { + "company_of_interest": "MSFT", + "trade_date": "2024-01-25", + "market_report": "Report. " * 100, + "investment_debate_state": { + "history": "", # Empty history + "count": 0, + "judge_decision": "HOLD: Insufficient deliberation", + }, + "final_trade_decision": "HOLD: More analysis needed", + } + + result = validate_agent_state(state) + + assert result.is_valid is True + assert len(result.warnings) > 0 # Should warn about empty history + + def test_very_long_debate_convergence_issue(self): + """Test detection of debates that went too long.""" + state = { + "company_of_interest": "NVDA", + "trade_date": "2024-01-28", + "market_report": "Report. " * 100, + "investment_debate_state": { + "history": "Round 1...\nRound 2...\n" * 15, + "count": 15, # Very high count + "judge_decision": "BUY: Finally reached consensus", + }, + "final_trade_decision": "BUY: After extensive deliberation", + } + + result = validate_agent_state(state) + + assert result.is_valid is True + # Should have warnings about high debate count + invest_debate_result = validate_debate_state( + state["investment_debate_state"], + debate_type="invest" + ) + assert len(invest_debate_result.warnings) > 0 + + def test_malformed_but_extractable_decision(self): + """Test extraction of signal from poorly formatted decision.""" + decisions = [ + "i think we should BUY this stock", + "recommendation: buy", + "buy!!!", + "Final call is to buy the position", + ] + + for decision in decisions: + result = validate_decision_quality(decision) + assert result.metrics["signal"] == "BUY" + + def test_missing_all_debate_states(self): + """Test handling when no debates occurred.""" + state = { + "company_of_interest": "META", + "trade_date": "2024-02-01", + "market_report": "Report. " * 100, + # No debate states + "final_trade_decision": "HOLD: No consensus reached", + } + + result = validate_agent_state(state) + + assert result.is_valid is True + assert len(result.warnings) > 0 + assert any("incomplete" in w.lower() for w in result.warnings) + + +# ============================================================================ +# Test Content Quality +# ============================================================================ + +class TestContentQuality: + """Test content quality validation across all outputs.""" + + def test_report_minimum_length_enforcement(self): + """Test that all reports meet minimum length requirements.""" + short_reports = [ + "Too short", + "Also short", + "Brief", + ] + + for report in short_reports: + result = validate_report_completeness(report, min_length=500) + assert result.is_valid is False + + def test_report_markdown_structure_quality(self): + """Test that well-structured reports are recognized.""" + well_structured_report = """ + # Market Analysis for AAPL + + ## Executive Summary + Strong buy signal based on comprehensive analysis. + + ## Technical Indicators + | Indicator | Value | Signal | + |-----------|-------|--------| + | RSI | 45 | Neutral| + | MACD | +2.3 | Buy | + + ## Fundamental Analysis + - Revenue growth: 15% YoY + - P/E ratio: 25 (reasonable for tech) + - Strong balance sheet + + ## Conclusion + """ + "Detailed conclusion. " * 50 + + result = validate_report_completeness( + well_structured_report, + min_length=500, + require_markdown_tables=True, + require_sections=True, + ) + + assert result.is_valid is True + assert result.metrics["markdown_tables"] > 0 + assert result.metrics["section_headers"] >= 3 + assert result.metrics["has_bullet_points"] is True + + def test_decision_clarity_with_reasoning(self): + """Test that clear decisions with reasoning are validated.""" + clear_decisions = [ + "BUY: Strong fundamentals (P/E 20), positive momentum (RSI 55), bullish sentiment", + "SELL: Overvalued at current P/E of 45, declining revenue, negative news", + "HOLD: Mixed signals - good fundamentals but uncertain market conditions", + ] + + for decision in clear_decisions: + result = validate_decision_quality(decision) + assert result.is_valid is True + assert result.metrics["has_reasoning"] is True + assert len(result.warnings) == 0 # Clear decisions shouldn't warn + + def test_decision_ambiguity_detection(self): + """Test detection of ambiguous decisions.""" + ambiguous_decisions = [ + "BUY or SELL, not sure", + "Maybe HOLD, could be BUY", + "SELL but also considering BUY", + ] + + for decision in ambiguous_decisions: + result = validate_decision_quality(decision) + # Should still extract first signal + assert result.metrics["signal"] is not None + # But should warn about ambiguity + assert len(result.warnings) > 0 + + def test_report_content_variety_indicators(self): + """Test that reports with varied content structure are recognized.""" + varied_report = """ + # Comprehensive Analysis + + ## Overview + Multiple content types present. + + ## Data Table + | Metric | Q1 | Q2 | Q3 | Q4 | + |--------|----|----|----|----| + | Revenue| 10M| 12M| 15M| 18M| + + ## Key Points + - Point 1 + - Point 2 + * Point 3 + + ## Details + """ + "Additional detailed analysis. " * 50 + + result = validate_report_completeness(varied_report, min_length=500) + + assert result.is_valid is True + assert result.metrics["markdown_tables"] > 0 + assert result.metrics["section_headers"] > 0 + assert result.metrics["has_bullet_points"] is True + # No warnings about lacking structure + assert not any("structured" in w.lower() for w in result.warnings) + + +# ============================================================================ +# Test State Integrity +# ============================================================================ + +class TestStateIntegrity: + """Test integrity and consistency of agent state.""" + + def test_all_required_fields_present(self, sample_agent_state_buy): + """Test that all required fields are present in state.""" + state = sample_agent_state_buy + + required_fields = [ + "company_of_interest", + "trade_date", + "market_report", + "sentiment_report", + "news_report", + "fundamentals_report", + "investment_debate_state", + "risk_debate_state", + "final_trade_decision", + ] + + for field in required_fields: + assert field in state, f"Missing required field: {field}" + + def test_debate_state_internal_consistency(self, sample_invest_debate): + """Test internal consistency of debate state.""" + debate = sample_invest_debate + + result = validate_debate_state(debate, debate_type="invest") + + assert result.is_valid is True + # Count should match history length (approximately) + assert result.metrics["count"] > 0 + assert result.metrics["history_length"] > 0 + + def test_final_decision_aligns_with_debates(self, sample_agent_state_buy): + """Test that final decision aligns with debate conclusions.""" + state = sample_agent_state_buy + + invest_debate = state["investment_debate_state"] + risk_debate = state["risk_debate_state"] + final_decision = state["final_trade_decision"] + + # Extract all signals + invest_result = validate_debate_state(invest_debate, debate_type="invest") + risk_result = validate_debate_state(risk_debate, debate_type="risk") + final_result = validate_decision_quality(final_decision) + + # All should be BUY for this scenario + assert invest_result.metrics.get("judge_signal") == "BUY" + assert risk_result.metrics.get("judge_signal") in ["BUY", "HOLD"] + assert final_result.metrics["signal"] == "BUY" + + def test_state_preserves_company_context(self, sample_agent_state_buy): + """Test that company context is preserved throughout state.""" + state = sample_agent_state_buy + + company = state["company_of_interest"] + trade_date = state["trade_date"] + + # Verify basic context + assert isinstance(company, str) + assert len(company) > 0 + assert isinstance(trade_date, str) + assert len(trade_date) > 0 + + def test_debate_history_chronological_consistency(self, sample_invest_debate): + """Test that debate history appears chronologically consistent.""" + debate = sample_invest_debate + + history = debate["history"] + count = debate["count"] + + # History should exist if count > 0 + if count > 0: + assert len(history) > 0 + + # If multiple rounds, history should reflect that + if count >= 2: + # Should have multiple segments or rounds + assert len(history) > 50 # Reasonable minimum for 2+ rounds + + def test_type_consistency_across_state(self, sample_agent_state_buy): + """Test that all fields have correct types.""" + state = sample_agent_state_buy + + # String fields + string_fields = [ + "company_of_interest", + "trade_date", + "market_report", + "sentiment_report", + "news_report", + "fundamentals_report", + "final_trade_decision", + ] + + for field in string_fields: + if field in state: + assert isinstance(state[field], str), f"{field} should be string" + + # Dict fields + dict_fields = ["investment_debate_state", "risk_debate_state"] + + for field in dict_fields: + if field in state: + assert isinstance(state[field], dict), f"{field} should be dict" + + def test_empty_state_detection(self): + """Test detection of completely empty state.""" + empty_state = {} + + result = validate_agent_state(empty_state) + + assert result.is_valid is False + assert len(result.errors) >= 2 # At least missing company and date diff --git a/tests/unit/test_output_validators.py b/tests/unit/test_output_validators.py new file mode 100644 index 00000000..925a0769 --- /dev/null +++ b/tests/unit/test_output_validators.py @@ -0,0 +1,700 @@ +""" +Test suite for Output Validation Utilities. + +This module tests: +1. ValidationResult dataclass behavior +2. Report completeness validation (length, markdown, sections) +3. Decision quality validation (signal extraction, reasoning) +4. Debate state validation (history, count, judge_decision) +5. Complete agent state validation (orchestration) + +All tests use mocked data (no real API calls). +""" + +import pytest +from typing import Dict, Any + +from tradingagents.utils.output_validator import ( + ValidationResult, + validate_report_completeness, + validate_decision_quality, + validate_debate_state, + validate_agent_state, +) + +pytestmark = pytest.mark.unit + + +# ============================================================================ +# Test ValidationResult Dataclass +# ============================================================================ + +class TestValidationResult: + """Test ValidationResult dataclass behavior.""" + + def test_default_valid_result(self): + """Test ValidationResult defaults to valid with empty lists.""" + result = ValidationResult(is_valid=True) + + assert result.is_valid is True + assert result.errors == [] + assert result.warnings == [] + assert result.metrics == {} + + def test_add_error_marks_invalid(self): + """Test that add_error() marks result as invalid.""" + result = ValidationResult(is_valid=True) + result.add_error("Something went wrong") + + assert result.is_valid is False + assert len(result.errors) == 1 + assert result.errors[0] == "Something went wrong" + + def test_add_warning_keeps_valid(self): + """Test that add_warning() doesn't change validity.""" + result = ValidationResult(is_valid=True) + result.add_warning("This could be better") + + assert result.is_valid is True + assert len(result.warnings) == 1 + assert result.warnings[0] == "This could be better" + + def test_add_metric(self): + """Test that add_metric() stores key-value pairs.""" + result = ValidationResult(is_valid=True) + result.add_metric("length", 500) + result.add_metric("signal", "BUY") + + assert result.metrics["length"] == 500 + assert result.metrics["signal"] == "BUY" + + def test_multiple_errors_and_warnings(self): + """Test accumulating multiple errors and warnings.""" + result = ValidationResult(is_valid=True) + result.add_error("Error 1") + result.add_error("Error 2") + result.add_warning("Warning 1") + result.add_warning("Warning 2") + + assert result.is_valid is False + assert len(result.errors) == 2 + assert len(result.warnings) == 2 + + +# ============================================================================ +# Test Report Validation +# ============================================================================ + +class TestReportValidation: + """Test validate_report_completeness() function.""" + + def test_valid_report_passes(self): + """Test that a valid report passes validation.""" + report = "# Market Analysis\n\n" + "This is a comprehensive report. " * 50 + + result = validate_report_completeness(report, min_length=500) + + assert result.is_valid is True + assert len(result.errors) == 0 + assert result.metrics["length"] > 500 + + def test_none_report_fails(self): + """Test that None report fails validation.""" + result = validate_report_completeness(None) + + assert result.is_valid is False + assert "None" in result.errors[0] + + def test_empty_report_fails(self): + """Test that empty report fails validation.""" + result = validate_report_completeness("") + + assert result.is_valid is False + assert "empty" in result.errors[0].lower() + + def test_short_report_fails(self): + """Test that report below min_length fails.""" + short_report = "Too short" + + result = validate_report_completeness(short_report, min_length=500) + + assert result.is_valid is False + assert any("minimum" in err.lower() for err in result.errors) + assert result.metrics["length"] < 500 + + def test_wrong_type_fails(self): + """Test that non-string report fails validation.""" + result = validate_report_completeness(123) + + assert result.is_valid is False + assert "string" in result.errors[0].lower() + + def test_markdown_table_detection(self): + """Test detection of markdown tables.""" + report_with_table = """ + # Analysis + + | Metric | Value | + |--------|-------| + | Price | $100 | + | Volume | 1M | + """ + "Additional text. " * 50 + + result = validate_report_completeness( + report_with_table, + min_length=200, + require_markdown_tables=True + ) + + assert result.is_valid is True + assert result.metrics["markdown_tables"] > 0 + + def test_missing_markdown_table_fails_when_required(self): + """Test that missing markdown tables fails when required.""" + report = "# Analysis\n\n" + "No tables here. " * 50 + + result = validate_report_completeness( + report, + min_length=200, + require_markdown_tables=True + ) + + assert result.is_valid is False + assert any("table" in err.lower() for err in result.errors) + + def test_section_header_detection(self): + """Test detection of section headers.""" + report_with_headers = """ + # Main Title + ## Subsection + ### Details + + Content here. + """ + "More content. " * 50 + + result = validate_report_completeness( + report_with_headers, + min_length=200, + require_sections=True + ) + + assert result.is_valid is True + assert result.metrics["section_headers"] >= 3 + + def test_missing_sections_fails_when_required(self): + """Test that missing sections fails when required.""" + report = "Just plain text. " * 50 + + result = validate_report_completeness( + report, + min_length=200, + require_sections=True + ) + + assert result.is_valid is False + assert any("section" in err.lower() for err in result.errors) + + def test_short_report_warning(self): + """Test warning for relatively short reports.""" + # Report is above min but below 1.5x min + report = "Short but valid. " * 40 # ~680 chars + + result = validate_report_completeness(report, min_length=500) + + assert result.is_valid is True + assert len(result.warnings) > 0 + assert any("short" in warn.lower() for warn in result.warnings) + + def test_bullet_point_detection(self): + """Test detection of bullet points.""" + report_with_bullets = """ + # Analysis + + - Point 1 + - Point 2 + * Point 3 + + """ + "Additional content. " * 50 + + result = validate_report_completeness(report_with_bullets, min_length=200) + + assert result.metrics["has_bullet_points"] is True + + def test_unstructured_content_warning(self): + """Test warning for content lacking structure.""" + unstructured_report = "Just a long stream of text without any structure. " * 50 + + result = validate_report_completeness(unstructured_report, min_length=500) + + assert result.is_valid is True + assert any("structured" in warn.lower() for warn in result.warnings) + + +# ============================================================================ +# Test Decision Validation +# ============================================================================ + +class TestDecisionValidation: + """Test validate_decision_quality() function.""" + + def test_valid_buy_decision(self): + """Test that valid BUY decision passes.""" + decision = "BUY: Strong fundamentals and positive momentum" + + result = validate_decision_quality(decision) + + assert result.is_valid is True + assert result.metrics["signal"] == "BUY" + assert result.metrics["has_reasoning"] is True + + def test_valid_sell_decision(self): + """Test that valid SELL decision passes.""" + decision = "SELL: Overvalued with deteriorating fundamentals" + + result = validate_decision_quality(decision) + + assert result.is_valid is True + assert result.metrics["signal"] == "SELL" + + def test_valid_hold_decision(self): + """Test that valid HOLD decision passes.""" + decision = "HOLD: Mixed signals, awaiting clarity" + + result = validate_decision_quality(decision) + + assert result.is_valid is True + assert result.metrics["signal"] == "HOLD" + + def test_case_insensitive_signal_extraction(self): + """Test that signals are extracted case-insensitively.""" + decisions = [ + "buy the stock", + "BUY the stock", + "Buy the stock", + "We should buy", + ] + + for decision in decisions: + result = validate_decision_quality(decision) + assert result.metrics["signal"] == "BUY" + + def test_none_decision_fails(self): + """Test that None decision fails validation.""" + result = validate_decision_quality(None) + + assert result.is_valid is False + assert "None" in result.errors[0] + + def test_empty_decision_fails(self): + """Test that empty decision fails validation.""" + result = validate_decision_quality("") + + assert result.is_valid is False + assert "empty" in result.errors[0].lower() + + def test_no_signal_fails(self): + """Test that decision without signal fails.""" + decision = "This is a decision without a clear signal" + + result = validate_decision_quality(decision) + + assert result.is_valid is False + assert any("signal" in err.lower() for err in result.errors) + assert result.metrics["signal"] is None + + def test_wrong_type_fails(self): + """Test that non-string decision fails.""" + result = validate_decision_quality({"decision": "BUY"}) + + assert result.is_valid is False + assert "string" in result.errors[0].lower() + + def test_multiple_signals_warning(self): + """Test warning for multiple conflicting signals.""" + decision = "BUY or maybe SELL, hard to decide, could HOLD" + + result = validate_decision_quality(decision) + + # Should still extract first signal + assert result.metrics["signal"] == "BUY" + # But warn about conflicts + assert len(result.warnings) > 0 + assert any("conflicting" in warn.lower() for warn in result.warnings) + + def test_short_decision_warning(self): + """Test warning for very short decisions.""" + decision = "BUY" + + result = validate_decision_quality(decision) + + assert result.is_valid is True + assert len(result.warnings) > 0 + assert any("short" in warn.lower() for warn in result.warnings) + + def test_decision_with_reasoning_markers(self): + """Test that reasoning markers are detected.""" + decisions_with_reasoning = [ + "BUY: Strong fundamentals", + "SELL. Company is overvalued.", + "HOLD because market is uncertain", + ] + + for decision in decisions_with_reasoning: + result = validate_decision_quality(decision) + assert result.metrics["has_reasoning"] is True + + def test_signal_count_metric(self): + """Test that signal_count metric is accurate.""" + decision = "BUY BUY BUY! Strong signal to buy" + + result = validate_decision_quality(decision) + + assert result.metrics["signal_count"] == 4 + assert result.metrics["signal"] == "BUY" + + +# ============================================================================ +# Test Debate State Validation +# ============================================================================ + +class TestDebateStateValidation: + """Test validate_debate_state() function.""" + + def test_valid_invest_debate_state(self): + """Test that valid invest debate state passes.""" + debate_state = { + "history": "Round 1: Bull argues...\nRound 2: Bear counters...", + "count": 2, + "judge_decision": "BUY: Bulls made stronger case", + "bull_history": "Bull argument", + "bear_history": "Bear argument", + } + + result = validate_debate_state(debate_state, debate_type="invest") + + assert result.is_valid is True + assert result.metrics["history_length"] > 0 + assert result.metrics["count"] == 2 + assert result.metrics["judge_signal"] == "BUY" + + def test_valid_risk_debate_state(self): + """Test that valid risk debate state passes.""" + debate_state = { + "history": "Round 1: Risky argues...\nRound 2: Safe counters...", + "count": 2, + "judge_decision": "HOLD: Balanced risk profile", + "risky_history": "Risky argument", + "safe_history": "Safe argument", + "neutral_history": "Neutral argument", + } + + result = validate_debate_state(debate_state, debate_type="risk") + + assert result.is_valid is True + assert result.metrics["count"] == 2 + + def test_none_debate_state_fails(self): + """Test that None debate state fails.""" + result = validate_debate_state(None) + + assert result.is_valid is False + assert "None" in result.errors[0] + + def test_wrong_type_fails(self): + """Test that non-dict debate state fails.""" + result = validate_debate_state("not a dict") + + assert result.is_valid is False + assert "dict" in result.errors[0].lower() + + def test_missing_required_fields_fails(self): + """Test that missing required fields fails.""" + incomplete_state = { + "history": "Some history", + # Missing count and judge_decision + } + + result = validate_debate_state(incomplete_state) + + assert result.is_valid is False + assert any("missing" in err.lower() for err in result.errors) + + def test_invalid_debate_type_fails(self): + """Test that unknown debate type fails.""" + debate_state = { + "history": "History", + "count": 1, + "judge_decision": "BUY", + } + + result = validate_debate_state(debate_state, debate_type="unknown") + + assert result.is_valid is False + assert "unknown" in result.errors[0].lower() + + def test_empty_history_warning(self): + """Test warning for empty history.""" + debate_state = { + "history": "", + "count": 0, + "judge_decision": "HOLD", + } + + result = validate_debate_state(debate_state) + + assert result.is_valid is True + assert any("empty" in warn.lower() for warn in result.warnings) + + def test_negative_count_fails(self): + """Test that negative count fails.""" + debate_state = { + "history": "History", + "count": -1, + "judge_decision": "BUY", + } + + result = validate_debate_state(debate_state) + + assert result.is_valid is False + assert any("negative" in err.lower() for err in result.errors) + + def test_high_count_warning(self): + """Test warning for very high debate count.""" + debate_state = { + "history": "Long debate...", + "count": 15, + "judge_decision": "SELL", + } + + result = validate_debate_state(debate_state) + + assert result.is_valid is True + assert any("high" in warn.lower() for warn in result.warnings) + + def test_invalid_judge_decision_warning(self): + """Test warning for poor quality judge decision.""" + debate_state = { + "history": "History", + "count": 2, + "judge_decision": "No clear signal here", + } + + result = validate_debate_state(debate_state) + + assert result.is_valid is True + assert len(result.warnings) > 0 + + def test_optional_fields_metric(self): + """Test that optional fields are counted.""" + debate_state = { + "history": "History", + "count": 1, + "judge_decision": "BUY", + "bull_history": "Bull", + "bear_history": "Bear", + } + + result = validate_debate_state(debate_state, debate_type="invest") + + assert result.metrics["optional_fields_present"] >= 2 + + def test_wrong_history_type_fails(self): + """Test that non-string history fails.""" + debate_state = { + "history": 123, + "count": 1, + "judge_decision": "BUY", + } + + result = validate_debate_state(debate_state) + + assert result.is_valid is False + assert any("string" in err.lower() for err in result.errors) + + def test_wrong_count_type_fails(self): + """Test that non-int count fails.""" + debate_state = { + "history": "History", + "count": "two", + "judge_decision": "BUY", + } + + result = validate_debate_state(debate_state) + + assert result.is_valid is False + assert any("int" in err.lower() for err in result.errors) + + +# ============================================================================ +# Test Agent State Validation +# ============================================================================ + +class TestAgentStateValidation: + """Test validate_agent_state() function.""" + + def test_valid_complete_agent_state(self): + """Test that complete valid agent state passes.""" + state = { + "company_of_interest": "AAPL", + "trade_date": "2024-01-15", + "market_report": "# Market Analysis\n\n" + "Detailed analysis. " * 100, + "sentiment_report": "# Sentiment Report\n\n" + "Social sentiment. " * 100, + "news_report": "# News Report\n\n" + "Latest news. " * 100, + "fundamentals_report": "# Fundamentals\n\n" + "Financial data. " * 100, + "investment_debate_state": { + "history": "Debate history", + "count": 3, + "judge_decision": "BUY: Strong case", + }, + "risk_debate_state": { + "history": "Risk debate", + "count": 2, + "judge_decision": "HOLD: Moderate risk", + }, + "final_trade_decision": "BUY: All signals align positively", + } + + result = validate_agent_state(state) + + assert result.is_valid is True + assert result.metrics["company_of_interest"] == "AAPL" + assert result.metrics["trade_date"] == "2024-01-15" + assert result.metrics["reports_present"] == 4 + assert result.metrics["final_signal"] == "BUY" + + def test_none_state_fails(self): + """Test that None state fails.""" + result = validate_agent_state(None) + + assert result.is_valid is False + assert "None" in result.errors[0] + + def test_wrong_type_fails(self): + """Test that non-dict state fails.""" + result = validate_agent_state("not a dict") + + assert result.is_valid is False + assert "dict" in result.errors[0].lower() + + def test_missing_company_fails(self): + """Test that missing company fails.""" + state = { + "trade_date": "2024-01-15", + } + + result = validate_agent_state(state) + + assert result.is_valid is False + assert any("company" in err.lower() for err in result.errors) + + def test_missing_trade_date_fails(self): + """Test that missing trade date fails.""" + state = { + "company_of_interest": "AAPL", + } + + result = validate_agent_state(state) + + assert result.is_valid is False + assert any("trade_date" in err.lower() for err in result.errors) + + def test_incomplete_reports_warning(self): + """Test warning when some reports are missing.""" + state = { + "company_of_interest": "AAPL", + "trade_date": "2024-01-15", + "market_report": "Market analysis. " * 100, + # Missing other reports + } + + result = validate_agent_state(state) + + # Basic fields present, so valid + assert result.is_valid is True + # But warn about missing reports + assert len(result.warnings) > 0 + assert result.metrics["reports_present"] < 4 + + def test_invalid_report_warning(self): + """Test warning for invalid report content.""" + state = { + "company_of_interest": "AAPL", + "trade_date": "2024-01-15", + "market_report": "Too short", # Below min length + } + + result = validate_agent_state(state) + + assert result.is_valid is True + assert any("market_report" in warn.lower() for warn in result.warnings) + + def test_invalid_invest_debate_warning(self): + """Test warning for invalid investment debate.""" + state = { + "company_of_interest": "AAPL", + "trade_date": "2024-01-15", + "investment_debate_state": { + # Missing required fields + "history": "History", + }, + } + + result = validate_agent_state(state) + + assert result.is_valid is True + assert any("investment debate" in warn.lower() for warn in result.warnings) + + def test_invalid_risk_debate_warning(self): + """Test warning for invalid risk debate.""" + state = { + "company_of_interest": "AAPL", + "trade_date": "2024-01-15", + "risk_debate_state": { + "count": -1, # Invalid + }, + } + + result = validate_agent_state(state) + + assert result.is_valid is True + assert any("risk debate" in warn.lower() for warn in result.warnings) + + def test_invalid_final_decision_warning(self): + """Test warning for invalid final decision.""" + state = { + "company_of_interest": "AAPL", + "trade_date": "2024-01-15", + "final_trade_decision": "No clear signal", + } + + result = validate_agent_state(state) + + assert result.is_valid is True + assert any("final decision" in warn.lower() for warn in result.warnings) + + def test_incomplete_state_warning(self): + """Test warning for very incomplete state.""" + state = { + "company_of_interest": "AAPL", + "trade_date": "2024-01-15", + # No debates or decision + } + + result = validate_agent_state(state) + + assert result.is_valid is True + assert any("incomplete" in warn.lower() for warn in result.warnings) + + def test_reports_count_metrics(self): + """Test that report counts are tracked.""" + state = { + "company_of_interest": "AAPL", + "trade_date": "2024-01-15", + "market_report": "Report. " * 100, + "sentiment_report": "Report. " * 100, + } + + result = validate_agent_state(state) + + assert result.metrics["reports_present"] == 2 + assert result.metrics["total_reports_expected"] == 4 diff --git a/tradingagents/utils/output_validator.py b/tradingagents/utils/output_validator.py new file mode 100644 index 00000000..079c8424 --- /dev/null +++ b/tradingagents/utils/output_validator.py @@ -0,0 +1,453 @@ +""" +Output validation utilities for agent outputs. + +This module provides validation functions for: +- Report completeness (length, structure, markdown formatting) +- Decision quality (signal extraction, reasoning clarity) +- Debate state coherence (history tracking, judge decisions) +- Complete agent state validation + +All validators return ValidationResult with actionable feedback. +""" + +from dataclasses import dataclass, field +from typing import List, Optional, Dict, Any +import re + + +@dataclass +class ValidationResult: + """ + Result of a validation check with actionable feedback. + + Attributes: + is_valid: True if validation passed, False otherwise + errors: List of error messages (validation failures) + warnings: List of warning messages (quality concerns) + metrics: Dictionary of measured metrics (e.g., length, counts) + """ + is_valid: bool + errors: List[str] = field(default_factory=list) + warnings: List[str] = field(default_factory=list) + metrics: Dict[str, Any] = field(default_factory=dict) + + def add_error(self, message: str) -> None: + """Add an error and mark validation as failed.""" + self.errors.append(message) + self.is_valid = False + + def add_warning(self, message: str) -> None: + """Add a warning (doesn't fail validation).""" + self.warnings.append(message) + + def add_metric(self, key: str, value: Any) -> None: + """Add a measured metric.""" + self.metrics[key] = value + + +def validate_report_completeness( + report: Optional[str], + min_length: int = 500, + require_markdown_tables: bool = False, + require_sections: bool = False, +) -> ValidationResult: + """ + Validate that a report is complete and well-structured. + + Args: + report: The report text to validate + min_length: Minimum character count required (default: 500) + require_markdown_tables: Whether to require markdown tables + require_sections: Whether to require section headers (##) + + Returns: + ValidationResult with errors, warnings, and metrics + + Example: + >>> result = validate_report_completeness("# Report\\n\\nThis is too short") + >>> assert not result.is_valid + >>> assert "minimum length" in result.errors[0].lower() + """ + result = ValidationResult(is_valid=True) + + # Check if report exists + if report is None: + result.add_error("Report is None") + return result + + if not isinstance(report, str): + result.add_error(f"Report must be string, got {type(report).__name__}") + return result + + # Check length + report_length = len(report.strip()) + result.add_metric("length", report_length) + + if report_length == 0: + result.add_error("Report is empty") + return result + + if report_length < min_length: + result.add_error( + f"Report length ({report_length}) below minimum ({min_length})" + ) + + # Check for markdown tables + markdown_tables = re.findall(r'\|.*\|', report) + result.add_metric("markdown_tables", len(markdown_tables)) + + if require_markdown_tables and len(markdown_tables) == 0: + result.add_error("Report missing required markdown tables") + + # Check for section headers (allow optional leading whitespace) + section_headers = re.findall(r'^\s*#{1,6}\s+.+$', report, re.MULTILINE) + result.add_metric("section_headers", len(section_headers)) + + if require_sections and len(section_headers) == 0: + result.add_error("Report missing required section headers") + + # Quality warnings + if report_length < min_length * 1.5: + result.add_warning( + f"Report is relatively short ({report_length} chars). " + f"Consider adding more detail." + ) + + # Check for basic structure indicators + has_bullet_points = bool(re.search(r'^\s*[-*]\s+', report, re.MULTILINE)) + result.add_metric("has_bullet_points", has_bullet_points) + + if not has_bullet_points and not markdown_tables: + result.add_warning("Report lacks structured content (no bullets or tables)") + + return result + + +def validate_decision_quality(decision: Optional[str]) -> ValidationResult: + """ + Validate trading decision quality and extract signal. + + Validates: + - Decision is not None/empty + - Contains clear BUY/SELL/HOLD signal + - Has reasoning/explanation + - Signal is unambiguous + + Args: + decision: The decision text to validate + + Returns: + ValidationResult with extracted signal in metrics + + Example: + >>> result = validate_decision_quality("BUY: Strong fundamentals") + >>> assert result.is_valid + >>> assert result.metrics["signal"] == "BUY" + """ + result = ValidationResult(is_valid=True) + + # Check if decision exists + if decision is None: + result.add_error("Decision is None") + return result + + if not isinstance(decision, str): + result.add_error(f"Decision must be string, got {type(decision).__name__}") + return result + + decision_clean = decision.strip() + if not decision_clean: + result.add_error("Decision is empty") + return result + + result.add_metric("length", len(decision_clean)) + + # Extract trading signal (case-insensitive) + signal_pattern = r'\b(BUY|SELL|HOLD)\b' + matches = re.findall(signal_pattern, decision_clean, re.IGNORECASE) + + if not matches: + result.add_error( + "No clear trading signal found (expected BUY, SELL, or HOLD)" + ) + result.add_metric("signal", None) + return result + + # Get first signal and normalize to uppercase + signal = matches[0].upper() + result.add_metric("signal", signal) + result.add_metric("signal_count", len(matches)) + + # Warn if multiple conflicting signals + unique_signals = set(m.upper() for m in matches) + if len(unique_signals) > 1: + result.add_warning( + f"Multiple conflicting signals found: {unique_signals}. " + f"Using first occurrence: {signal}" + ) + + # Check for reasoning + # Split by common delimiters and check if there's explanation + has_reasoning = any([ + ':' in decision_clean, + '.' in decision_clean, + len(decision_clean.split()) >= 5, + ]) + + result.add_metric("has_reasoning", has_reasoning) + + if not has_reasoning: + result.add_warning( + "Decision lacks clear reasoning or explanation" + ) + + # Check decision length + if len(decision_clean) < 20: + result.add_warning( + f"Decision is very short ({len(decision_clean)} chars). " + f"Consider adding more rationale." + ) + + return result + + +def validate_debate_state( + debate_state: Optional[Dict[str, Any]], + debate_type: str = "invest", +) -> ValidationResult: + """ + Validate debate state structure and coherence. + + Validates: + - Required fields present (history, count, judge_decision) + - History is not empty + - Count is reasonable (>= 0) + - Judge decision exists if debate concluded + + Args: + debate_state: The debate state dictionary to validate + debate_type: Type of debate ("invest" or "risk") + + Returns: + ValidationResult with debate metrics + + Example: + >>> state = {"history": "Round 1...", "count": 1, "judge_decision": "BUY"} + >>> result = validate_debate_state(state) + >>> assert result.is_valid + """ + result = ValidationResult(is_valid=True) + + # Check if state exists + if debate_state is None: + result.add_error("Debate state is None") + return result + + if not isinstance(debate_state, dict): + result.add_error( + f"Debate state must be dict, got {type(debate_state).__name__}" + ) + return result + + # Define required fields based on debate type + if debate_type == "invest": + required_fields = ["history", "count", "judge_decision"] + optional_fields = ["bull_history", "bear_history", "current_response"] + elif debate_type == "risk": + required_fields = ["history", "count", "judge_decision"] + optional_fields = [ + "risky_history", + "safe_history", + "neutral_history", + "latest_speaker", + "current_risky_response", + "current_safe_response", + "current_neutral_response", + ] + else: + result.add_error(f"Unknown debate type: {debate_type}") + return result + + # Check required fields + missing_fields = [f for f in required_fields if f not in debate_state] + if missing_fields: + result.add_error(f"Missing required fields: {missing_fields}") + return result + + # Validate history + history = debate_state.get("history") + if history is not None: + if not isinstance(history, str): + result.add_error( + f"History must be string, got {type(history).__name__}" + ) + elif not history.strip(): + result.add_warning("History is empty") + else: + result.add_metric("history_length", len(history)) + + # Validate count + count = debate_state.get("count") + if count is not None: + if not isinstance(count, int): + result.add_error(f"Count must be int, got {type(count).__name__}") + elif count < 0: + result.add_error(f"Count cannot be negative: {count}") + else: + result.add_metric("count", count) + + # Warn if debate went too long + if count > 10: + result.add_warning( + f"Debate count is very high ({count}). " + f"May indicate convergence issues." + ) + + # Validate judge decision + judge_decision = debate_state.get("judge_decision") + if judge_decision is not None: + if isinstance(judge_decision, str): + if judge_decision.strip(): + # Validate decision quality + decision_result = validate_decision_quality(judge_decision) + if not decision_result.is_valid: + result.add_warning( + f"Judge decision has quality issues: " + f"{', '.join(decision_result.errors)}" + ) + else: + result.add_metric("judge_signal", decision_result.metrics.get("signal")) + else: + result.add_warning("Judge decision is empty") + else: + result.add_error( + f"Judge decision must be string, got {type(judge_decision).__name__}" + ) + + # Check optional fields for completeness + present_optional = [f for f in optional_fields if f in debate_state] + result.add_metric("optional_fields_present", len(present_optional)) + + return result + + +def validate_agent_state(state: Optional[Dict[str, Any]]) -> ValidationResult: + """ + Validate complete agent state structure. + + Orchestrates all validators to check: + - Company and trade date present + - All reports complete + - Investment debate state valid + - Risk debate state valid + - Final decision quality + + Args: + state: The complete agent state dictionary + + Returns: + ValidationResult with comprehensive validation + + Example: + >>> state = { + ... "company_of_interest": "AAPL", + ... "trade_date": "2024-01-15", + ... "market_report": "Market analysis..." * 100, + ... } + >>> result = validate_agent_state(state) + >>> assert "company_of_interest" in result.metrics + """ + result = ValidationResult(is_valid=True) + + # Check if state exists + if state is None: + result.add_error("Agent state is None") + return result + + if not isinstance(state, dict): + result.add_error(f"Agent state must be dict, got {type(state).__name__}") + return result + + # Validate basic fields + company = state.get("company_of_interest") + if not company: + result.add_error("Missing company_of_interest") + else: + result.add_metric("company_of_interest", company) + + trade_date = state.get("trade_date") + if not trade_date: + result.add_error("Missing trade_date") + else: + result.add_metric("trade_date", trade_date) + + # Validate reports + report_fields = [ + "market_report", + "sentiment_report", + "news_report", + "fundamentals_report", + ] + + reports_present = 0 + for report_field in report_fields: + report = state.get(report_field) + if report: + reports_present += 1 + report_result = validate_report_completeness( + report, + min_length=500, + require_markdown_tables=False, + require_sections=False, + ) + if not report_result.is_valid: + result.add_warning( + f"{report_field} has issues: {', '.join(report_result.errors)}" + ) + + result.add_metric("reports_present", reports_present) + result.add_metric("total_reports_expected", len(report_fields)) + + if reports_present < len(report_fields): + result.add_warning( + f"Only {reports_present}/{len(report_fields)} reports present" + ) + + # Validate investment debate state + invest_debate = state.get("investment_debate_state") + if invest_debate: + invest_result = validate_debate_state(invest_debate, debate_type="invest") + if not invest_result.is_valid: + result.add_warning( + f"Investment debate has issues: {', '.join(invest_result.errors)}" + ) + result.add_metric("investment_debate_valid", invest_result.is_valid) + + # Validate risk debate state + risk_debate = state.get("risk_debate_state") + if risk_debate: + risk_result = validate_debate_state(risk_debate, debate_type="risk") + if not risk_result.is_valid: + result.add_warning( + f"Risk debate has issues: {', '.join(risk_result.errors)}" + ) + result.add_metric("risk_debate_valid", risk_result.is_valid) + + # Validate final decision + final_decision = state.get("final_trade_decision") + if final_decision: + decision_result = validate_decision_quality(final_decision) + if not decision_result.is_valid: + result.add_warning( + f"Final decision has issues: {', '.join(decision_result.errors)}" + ) + else: + result.add_metric("final_signal", decision_result.metrics.get("signal")) + + # Overall completeness check + if not invest_debate and not risk_debate: + result.add_warning( + "State appears incomplete: no debate states present" + ) + + return result