feat(tests): add UAT and evaluation tests for agent outputs - Fixes #53

- Created tradingagents/utils/output_validator.py with ValidationResult dataclass - Added validate_report_completeness(), validate_decision_quality() for content validation - Added validate_debate_state(), validate_agent_state() for state coherence - Created tests/unit/test_output_validators.py with 54 unit tests - Created tests/e2e/test_uat_agent_outputs.py with 23 UAT scenarios - Added agent state fixtures to tests/conftest.py (sample_agent_state, debates) - Total: 77 tests covering report quality, signal extraction, and state integrity 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-26 11:38:37 +11:00 · 2025-12-26 11:38:37 +11:00 · e5575250df
parent b4653ca37b
commit e5575250df
4 changed files with 1916 additions and 0 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -383,3 +383,271 @@ def openrouter_config():
        "backend_url": "https://openrouter.ai/api/v1",
    })
    return config
+
+
+# ============================================================================
+# Agent Output Validation Fixtures (Issue #53)
+# ============================================================================
+
+@pytest.fixture
+def sample_agent_state():
+    """
+    Create a complete sample agent state for testing.
+
+    Provides a fully populated agent state with all required fields
+    including reports, debate states, and final decision.
+
+    Scope: function (default)
+
+    Returns:
+        dict: Complete agent state with all fields populated
+
+    Example:
+        def test_complete_state(sample_agent_state):
+            assert sample_agent_state["company_of_interest"] == "AAPL"
+            assert "market_report" in sample_agent_state
+    """
+    return {
+        "company_of_interest": "AAPL",
+        "trade_date": "2024-01-15",
+        "market_report": """
+# Market Analysis for AAPL
+
+## Technical Indicators
+Strong bullish momentum with RSI at 55 and MACD showing positive divergence.
+Price has broken through key resistance at $175.
+
+## Volume Analysis
+Above-average volume on recent upward moves indicates strong buyer interest.
+Institutional accumulation pattern observed over the past 2 weeks.
+
+## Price Action
+Clear higher highs and higher lows pattern establishing uptrend.
+Support level established at $170 with strong buying pressure.
+        """ + "Additional detailed analysis. " * 30,
+        "sentiment_report": """
+# Social Media Sentiment Analysis
+
+## Overall Sentiment
+Strongly positive sentiment across major platforms (Twitter, Reddit, StockTwits).
+Sentiment score: 8.5/10 based on 10,000+ analyzed posts.
+
+## Key Themes
+- New product launch excitement
+- Strong quarterly earnings anticipation
+- Innovation leadership recognition
+
+## Influencer Activity
+Major tech influencers bullish on near-term prospects.
+        """ + "More sentiment details. " * 30,
+        "news_report": """
+# News Analysis
+
+## Recent Headlines
+- Major product announcement driving positive coverage
+- Analyst upgrades from 3 top firms this week
+- Partnership announcements in AI space
+
+## Coverage Tone
+85% positive, 10% neutral, 5% negative across 50 major news sources.
+
+## Impact Assessment
+News flow strongly supportive of bullish thesis.
+        """ + "Additional news analysis. " * 30,
+        "fundamentals_report": """
+# Fundamental Analysis
+
+## Financial Metrics
+| Metric | Value | Industry Avg |
+|--------|-------|--------------|
+| P/E    | 28    | 25           |
+| ROE    | 45%   | 20%          |
+| Revenue Growth | 12% | 8%     |
+
+## Balance Sheet
+Strong cash position of $150B, low debt-to-equity ratio.
+
+## Earnings Quality
+Consistent earnings growth with strong margins.
+        """ + "Detailed fundamental analysis. " * 30,
+        "investment_debate_state": {
+            "history": "Round 1: Bull presents case for strong buy...\nRound 2: Bear raises concerns about valuation...\nRound 3: Bull counters with growth prospects...",
+            "count": 3,
+            "judge_decision": "BUY: Bulls made a compelling case with strong fundamentals and positive momentum",
+            "bull_history": "Strong fundamentals, positive momentum, innovation leadership",
+            "bear_history": "Slight valuation concerns, market volatility risk",
+            "current_response": "Final recommendation is BUY",
+        },
+        "risk_debate_state": {
+            "history": "Round 1: Risk assessment begins...\nRound 2: Conservative view presented...",
+            "count": 2,
+            "judge_decision": "BUY: Risk is acceptable given strong fundamentals",
+            "risky_history": "High potential upside justifies position",
+            "safe_history": "Proceed with caution, good fundamentals",
+            "neutral_history": "Balanced risk-reward at current levels",
+            "latest_speaker": "neutral",
+            "current_risky_response": "Strong buy",
+            "current_safe_response": "Moderate buy",
+            "current_neutral_response": "Buy with standard position sizing",
+        },
+        "final_trade_decision": "BUY: Strong consensus across all analysis teams. Fundamentals solid, technicals bullish, sentiment positive. Entry at current levels recommended with standard position sizing.",
+        "investment_plan": "Initiate position with 2% portfolio allocation",
+        "trader_investment_plan": "Execute market order for calculated position size",
+        "sender": "trader",
+    }
+
+
+@pytest.fixture
+def sample_agent_state_buy(sample_agent_state):
+    """
+    Sample agent state with BUY decision.
+
+    Returns complete state configured for BUY scenario.
+
+    Scope: function (default)
+
+    Example:
+        def test_buy_scenario(sample_agent_state_buy):
+            assert "BUY" in sample_agent_state_buy["final_trade_decision"]
+    """
+    return sample_agent_state
+
+
+@pytest.fixture
+def sample_agent_state_sell():
+    """
+    Sample agent state with SELL decision.
+
+    Provides a complete state where all analyses point to SELL.
+
+    Scope: function (default)
+
+    Returns:
+        dict: Agent state with SELL decision
+
+    Example:
+        def test_sell_scenario(sample_agent_state_sell):
+            assert "SELL" in sample_agent_state_sell["final_trade_decision"]
+    """
+    return {
+        "company_of_interest": "TSLA",
+        "trade_date": "2024-01-20",
+        "market_report": "# Market Analysis\n\nBearish technical pattern with breakdown below support. " + "Detailed analysis. " * 50,
+        "sentiment_report": "# Sentiment Analysis\n\nNegative sentiment prevailing across platforms. " + "More details. " * 50,
+        "news_report": "# News Report\n\nMultiple negative headlines and analyst downgrades. " + "Additional coverage. " * 50,
+        "fundamentals_report": "# Fundamentals\n\nDeteriorating metrics and earnings concerns. " + "Financial details. " * 50,
+        "investment_debate_state": {
+            "history": "Round 1: Bear presents strong sell case...\nRound 2: Bull unable to counter effectively...",
+            "count": 2,
+            "judge_decision": "SELL: Bears made compelling case with fundamental concerns",
+            "bull_history": "Limited upside potential",
+            "bear_history": "Strong downside risk, overvalued",
+        },
+        "risk_debate_state": {
+            "history": "Round 1: Risk analysis shows high downside...",
+            "count": 1,
+            "judge_decision": "SELL: Exit position to preserve capital",
+            "risky_history": "Too risky, exit recommended",
+            "safe_history": "Definitely sell",
+            "neutral_history": "Sell is prudent",
+        },
+        "final_trade_decision": "SELL: Consensus to exit position. Fundamentals weak, technicals bearish, sentiment negative.",
+    }
+
+
+@pytest.fixture
+def sample_agent_state_hold():
+    """
+    Sample agent state with HOLD decision.
+
+    Provides a complete state where analyses are mixed, leading to HOLD.
+
+    Scope: function (default)
+
+    Returns:
+        dict: Agent state with HOLD decision
+
+    Example:
+        def test_hold_scenario(sample_agent_state_hold):
+            assert "HOLD" in sample_agent_state_hold["final_trade_decision"]
+    """
+    return {
+        "company_of_interest": "GOOGL",
+        "trade_date": "2024-01-22",
+        "market_report": "# Market Analysis\n\nMixed signals with consolidation pattern. " + "Technical details. " * 50,
+        "sentiment_report": "# Sentiment Analysis\n\nNeutral sentiment, market awaiting catalyst. " + "Sentiment data. " * 50,
+        "news_report": "# News Report\n\nBalanced news flow, no major catalysts. " + "News details. " * 50,
+        "fundamentals_report": "# Fundamentals\n\nSolid but not compelling, fairly valued. " + "Financial data. " * 50,
+        "investment_debate_state": {
+            "history": "Round 1: Bull and Bear present balanced views...\nRound 2: No clear winner...\nRound 3: Continued debate...",
+            "count": 3,
+            "judge_decision": "HOLD: Insufficient conviction either way, maintain position",
+            "bull_history": "Some positives but not strong",
+            "bear_history": "Some concerns but not severe",
+        },
+        "risk_debate_state": {
+            "history": "Round 1: Risk assessment shows balanced profile...",
+            "count": 1,
+            "judge_decision": "HOLD: Risk-reward balanced, no action needed",
+            "risky_history": "Could go either way",
+            "safe_history": "Wait for clarity",
+            "neutral_history": "Hold is appropriate",
+        },
+        "final_trade_decision": "HOLD: Mixed signals across analysis teams. Await further clarity before making move.",
+    }
+
+
+@pytest.fixture
+def sample_invest_debate():
+    """
+    Sample investment debate state.
+
+    Provides a complete investment debate state for isolated testing.
+
+    Scope: function (default)
+
+    Returns:
+        dict: Investment debate state (InvestDebateState)
+
+    Example:
+        def test_debate(sample_invest_debate):
+            assert sample_invest_debate["count"] > 0
+    """
+    return {
+        "history": "Round 1: Bull argues for strong buy based on fundamentals...\nRound 2: Bear raises valuation concerns...\nRound 3: Bull counters with growth prospects...",
+        "count": 3,
+        "judge_decision": "BUY: Bulls presented stronger evidence",
+        "bull_history": "Strong fundamentals, positive technicals, good sentiment",
+        "bear_history": "Valuation slightly stretched, some market risk",
+        "current_response": "Recommend BUY with conviction",
+    }
+
+
+@pytest.fixture
+def sample_risk_debate():
+    """
+    Sample risk debate state.
+
+    Provides a complete risk debate state for isolated testing.
+
+    Scope: function (default)
+
+    Returns:
+        dict: Risk debate state (RiskDebateState)
+
+    Example:
+        def test_risk_debate(sample_risk_debate):
+            assert sample_risk_debate["count"] > 0
+    """
+    return {
+        "history": "Round 1: Risk analysts evaluate position sizing...\nRound 2: Discussion on risk parameters...",
+        "count": 2,
+        "judge_decision": "BUY: Risk acceptable with standard position size",
+        "risky_history": "Aggressive position justified by strong signals",
+        "safe_history": "Conservative position appropriate given uncertainty",
+        "neutral_history": "Standard position sizing recommended",
+        "latest_speaker": "neutral",
+        "current_risky_response": "Take larger position",
+        "current_safe_response": "Take smaller position",
+        "current_neutral_response": "Standard position is balanced",
+    }
--- a/tests/e2e/test_uat_agent_outputs.py
+++ b/tests/e2e/test_uat_agent_outputs.py
@ -0,0 +1,495 @@
+"""
+UAT (User Acceptance Testing) for Agent Output Quality.
+
+This module provides end-to-end tests for complete agent workflows:
+1. Complete analysis workflow (BUY/SELL/HOLD scenarios)
+2. Edge case handling (missing data, conflicting reports)
+3. Content quality validation (length, structure, clarity)
+4. State integrity checks (field presence, debate coherence)
+
+All tests use mocked data to avoid real API calls.
+"""
+
+import pytest
+from typing import Dict, Any
+
+from tradingagents.utils.output_validator import (
+    validate_agent_state,
+    validate_decision_quality,
+    validate_debate_state,
+    validate_report_completeness,
+)
+
+pytestmark = pytest.mark.e2e
+
+
+# ============================================================================
+# Test Complete Analysis Workflow
+# ============================================================================
+
+class TestCompleteAnalysisWorkflow:
+    """Test complete agent analysis workflow for different trading scenarios."""
+
+    def test_buy_scenario_complete_workflow(self, sample_agent_state_buy):
+        """
+        Test complete BUY scenario workflow.
+
+        Validates:
+        - All reports generated
+        - Investment debate concludes with BUY
+        - Risk debate validates decision
+        - Final decision is BUY with reasoning
+        """
+        state = sample_agent_state_buy
+
+        # Validate complete state
+        result = validate_agent_state(state)
+
+        assert result.is_valid is True
+        assert result.metrics["company_of_interest"] == "AAPL"
+        assert result.metrics["reports_present"] == 4
+        assert result.metrics["final_signal"] == "BUY"
+        assert result.metrics["investment_debate_valid"] is True
+        assert result.metrics["risk_debate_valid"] is True
+
+    def test_sell_scenario_complete_workflow(self, sample_agent_state_sell):
+        """
+        Test complete SELL scenario workflow.
+
+        Validates:
+        - All reports generated
+        - Investment debate concludes with SELL
+        - Risk debate validates decision
+        - Final decision is SELL with reasoning
+        """
+        state = sample_agent_state_sell
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is True
+        assert result.metrics["final_signal"] == "SELL"
+        assert result.metrics["reports_present"] == 4
+
+    def test_hold_scenario_complete_workflow(self, sample_agent_state_hold):
+        """
+        Test complete HOLD scenario workflow.
+
+        Validates:
+        - All reports generated
+        - Investment debate is inconclusive or balanced
+        - Risk debate recommends caution
+        - Final decision is HOLD with reasoning
+        """
+        state = sample_agent_state_hold
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is True
+        assert result.metrics["final_signal"] == "HOLD"
+
+    def test_workflow_preserves_debate_history(self, sample_agent_state_buy):
+        """Test that debate history is preserved throughout workflow."""
+        state = sample_agent_state_buy
+
+        invest_debate = state["investment_debate_state"]
+        risk_debate = state["risk_debate_state"]
+
+        # Validate both debates have history
+        invest_result = validate_debate_state(invest_debate, debate_type="invest")
+        risk_result = validate_debate_state(risk_debate, debate_type="risk")
+
+        assert invest_result.metrics["history_length"] > 0
+        assert risk_result.metrics["history_length"] > 0
+        assert invest_result.metrics["count"] > 0
+        assert risk_result.metrics["count"] > 0
+
+    def test_workflow_all_reports_meet_quality_standards(self, sample_agent_state_buy):
+        """Test that all generated reports meet quality standards."""
+        state = sample_agent_state_buy
+
+        reports = [
+            state["market_report"],
+            state["sentiment_report"],
+            state["news_report"],
+            state["fundamentals_report"],
+        ]
+
+        for report in reports:
+            result = validate_report_completeness(
+                report,
+                min_length=500,
+                require_markdown_tables=False,
+                require_sections=False,
+            )
+            assert result.is_valid is True
+            assert result.metrics["length"] >= 500
+
+
+# ============================================================================
+# Test Edge Case Scenarios
+# ============================================================================
+
+class TestEdgeCaseScenarios:
+    """Test handling of edge cases and unusual scenarios."""
+
+    def test_missing_single_report_graceful_degradation(self):
+        """Test that workflow continues with one missing report."""
+        state = {
+            "company_of_interest": "TSLA",
+            "trade_date": "2024-01-20",
+            "market_report": "Market analysis. " * 100,
+            "sentiment_report": "Sentiment analysis. " * 100,
+            "news_report": "News analysis. " * 100,
+            # Missing fundamentals_report
+            "investment_debate_state": {
+                "history": "Debate based on available data",
+                "count": 3,
+                "judge_decision": "HOLD: Incomplete data, proceeding cautiously",
+            },
+            "risk_debate_state": {
+                "history": "Risk assessment",
+                "count": 2,
+                "judge_decision": "HOLD: Missing fundamentals increases uncertainty",
+            },
+            "final_trade_decision": "HOLD: Awaiting fundamental data",
+        }
+
+        result = validate_agent_state(state)
+
+        # Should still be valid but with warnings
+        assert result.is_valid is True
+        assert result.metrics["reports_present"] == 3
+        assert len(result.warnings) > 0
+
+    def test_conflicting_debate_conclusions_warning(self):
+        """Test warning when investment and risk debates conflict."""
+        state = {
+            "company_of_interest": "GOOGL",
+            "trade_date": "2024-01-22",
+            "market_report": "Report. " * 100,
+            "sentiment_report": "Report. " * 100,
+            "news_report": "Report. " * 100,
+            "fundamentals_report": "Report. " * 100,
+            "investment_debate_state": {
+                "history": "Bullish debate",
+                "count": 2,
+                "judge_decision": "BUY: Strong upside potential",
+            },
+            "risk_debate_state": {
+                "history": "Risk concerns",
+                "count": 2,
+                "judge_decision": "SELL: Risk too high",  # Conflicts with invest
+            },
+            "final_trade_decision": "HOLD: Conflicting signals from teams",
+        }
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is True
+        # Different signals detected
+        assert result.metrics.get("final_signal") == "HOLD"
+
+    def test_empty_debate_history_but_valid_decision(self):
+        """Test handling of empty debate history with valid decision."""
+        state = {
+            "company_of_interest": "MSFT",
+            "trade_date": "2024-01-25",
+            "market_report": "Report. " * 100,
+            "investment_debate_state": {
+                "history": "",  # Empty history
+                "count": 0,
+                "judge_decision": "HOLD: Insufficient deliberation",
+            },
+            "final_trade_decision": "HOLD: More analysis needed",
+        }
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is True
+        assert len(result.warnings) > 0  # Should warn about empty history
+
+    def test_very_long_debate_convergence_issue(self):
+        """Test detection of debates that went too long."""
+        state = {
+            "company_of_interest": "NVDA",
+            "trade_date": "2024-01-28",
+            "market_report": "Report. " * 100,
+            "investment_debate_state": {
+                "history": "Round 1...\nRound 2...\n" * 15,
+                "count": 15,  # Very high count
+                "judge_decision": "BUY: Finally reached consensus",
+            },
+            "final_trade_decision": "BUY: After extensive deliberation",
+        }
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is True
+        # Should have warnings about high debate count
+        invest_debate_result = validate_debate_state(
+            state["investment_debate_state"],
+            debate_type="invest"
+        )
+        assert len(invest_debate_result.warnings) > 0
+
+    def test_malformed_but_extractable_decision(self):
+        """Test extraction of signal from poorly formatted decision."""
+        decisions = [
+            "i think we should BUY this stock",
+            "recommendation: buy",
+            "buy!!!",
+            "Final call is to buy the position",
+        ]
+
+        for decision in decisions:
+            result = validate_decision_quality(decision)
+            assert result.metrics["signal"] == "BUY"
+
+    def test_missing_all_debate_states(self):
+        """Test handling when no debates occurred."""
+        state = {
+            "company_of_interest": "META",
+            "trade_date": "2024-02-01",
+            "market_report": "Report. " * 100,
+            # No debate states
+            "final_trade_decision": "HOLD: No consensus reached",
+        }
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is True
+        assert len(result.warnings) > 0
+        assert any("incomplete" in w.lower() for w in result.warnings)
+
+
+# ============================================================================
+# Test Content Quality
+# ============================================================================
+
+class TestContentQuality:
+    """Test content quality validation across all outputs."""
+
+    def test_report_minimum_length_enforcement(self):
+        """Test that all reports meet minimum length requirements."""
+        short_reports = [
+            "Too short",
+            "Also short",
+            "Brief",
+        ]
+
+        for report in short_reports:
+            result = validate_report_completeness(report, min_length=500)
+            assert result.is_valid is False
+
+    def test_report_markdown_structure_quality(self):
+        """Test that well-structured reports are recognized."""
+        well_structured_report = """
+        # Market Analysis for AAPL
+
+        ## Executive Summary
+        Strong buy signal based on comprehensive analysis.
+
+        ## Technical Indicators
+        | Indicator | Value | Signal |
+        |-----------|-------|--------|
+        | RSI       | 45    | Neutral|
+        | MACD      | +2.3  | Buy    |
+
+        ## Fundamental Analysis
+        - Revenue growth: 15% YoY
+        - P/E ratio: 25 (reasonable for tech)
+        - Strong balance sheet
+
+        ## Conclusion
+        """ + "Detailed conclusion. " * 50
+
+        result = validate_report_completeness(
+            well_structured_report,
+            min_length=500,
+            require_markdown_tables=True,
+            require_sections=True,
+        )
+
+        assert result.is_valid is True
+        assert result.metrics["markdown_tables"] > 0
+        assert result.metrics["section_headers"] >= 3
+        assert result.metrics["has_bullet_points"] is True
+
+    def test_decision_clarity_with_reasoning(self):
+        """Test that clear decisions with reasoning are validated."""
+        clear_decisions = [
+            "BUY: Strong fundamentals (P/E 20), positive momentum (RSI 55), bullish sentiment",
+            "SELL: Overvalued at current P/E of 45, declining revenue, negative news",
+            "HOLD: Mixed signals - good fundamentals but uncertain market conditions",
+        ]
+
+        for decision in clear_decisions:
+            result = validate_decision_quality(decision)
+            assert result.is_valid is True
+            assert result.metrics["has_reasoning"] is True
+            assert len(result.warnings) == 0  # Clear decisions shouldn't warn
+
+    def test_decision_ambiguity_detection(self):
+        """Test detection of ambiguous decisions."""
+        ambiguous_decisions = [
+            "BUY or SELL, not sure",
+            "Maybe HOLD, could be BUY",
+            "SELL but also considering BUY",
+        ]
+
+        for decision in ambiguous_decisions:
+            result = validate_decision_quality(decision)
+            # Should still extract first signal
+            assert result.metrics["signal"] is not None
+            # But should warn about ambiguity
+            assert len(result.warnings) > 0
+
+    def test_report_content_variety_indicators(self):
+        """Test that reports with varied content structure are recognized."""
+        varied_report = """
+        # Comprehensive Analysis
+
+        ## Overview
+        Multiple content types present.
+
+        ## Data Table
+        | Metric | Q1 | Q2 | Q3 | Q4 |
+        |--------|----|----|----|----|
+        | Revenue| 10M| 12M| 15M| 18M|
+
+        ## Key Points
+        - Point 1
+        - Point 2
+        * Point 3
+
+        ## Details
+        """ + "Additional detailed analysis. " * 50
+
+        result = validate_report_completeness(varied_report, min_length=500)
+
+        assert result.is_valid is True
+        assert result.metrics["markdown_tables"] > 0
+        assert result.metrics["section_headers"] > 0
+        assert result.metrics["has_bullet_points"] is True
+        # No warnings about lacking structure
+        assert not any("structured" in w.lower() for w in result.warnings)
+
+
+# ============================================================================
+# Test State Integrity
+# ============================================================================
+
+class TestStateIntegrity:
+    """Test integrity and consistency of agent state."""
+
+    def test_all_required_fields_present(self, sample_agent_state_buy):
+        """Test that all required fields are present in state."""
+        state = sample_agent_state_buy
+
+        required_fields = [
+            "company_of_interest",
+            "trade_date",
+            "market_report",
+            "sentiment_report",
+            "news_report",
+            "fundamentals_report",
+            "investment_debate_state",
+            "risk_debate_state",
+            "final_trade_decision",
+        ]
+
+        for field in required_fields:
+            assert field in state, f"Missing required field: {field}"
+
+    def test_debate_state_internal_consistency(self, sample_invest_debate):
+        """Test internal consistency of debate state."""
+        debate = sample_invest_debate
+
+        result = validate_debate_state(debate, debate_type="invest")
+
+        assert result.is_valid is True
+        # Count should match history length (approximately)
+        assert result.metrics["count"] > 0
+        assert result.metrics["history_length"] > 0
+
+    def test_final_decision_aligns_with_debates(self, sample_agent_state_buy):
+        """Test that final decision aligns with debate conclusions."""
+        state = sample_agent_state_buy
+
+        invest_debate = state["investment_debate_state"]
+        risk_debate = state["risk_debate_state"]
+        final_decision = state["final_trade_decision"]
+
+        # Extract all signals
+        invest_result = validate_debate_state(invest_debate, debate_type="invest")
+        risk_result = validate_debate_state(risk_debate, debate_type="risk")
+        final_result = validate_decision_quality(final_decision)
+
+        # All should be BUY for this scenario
+        assert invest_result.metrics.get("judge_signal") == "BUY"
+        assert risk_result.metrics.get("judge_signal") in ["BUY", "HOLD"]
+        assert final_result.metrics["signal"] == "BUY"
+
+    def test_state_preserves_company_context(self, sample_agent_state_buy):
+        """Test that company context is preserved throughout state."""
+        state = sample_agent_state_buy
+
+        company = state["company_of_interest"]
+        trade_date = state["trade_date"]
+
+        # Verify basic context
+        assert isinstance(company, str)
+        assert len(company) > 0
+        assert isinstance(trade_date, str)
+        assert len(trade_date) > 0
+
+    def test_debate_history_chronological_consistency(self, sample_invest_debate):
+        """Test that debate history appears chronologically consistent."""
+        debate = sample_invest_debate
+
+        history = debate["history"]
+        count = debate["count"]
+
+        # History should exist if count > 0
+        if count > 0:
+            assert len(history) > 0
+
+        # If multiple rounds, history should reflect that
+        if count >= 2:
+            # Should have multiple segments or rounds
+            assert len(history) > 50  # Reasonable minimum for 2+ rounds
+
+    def test_type_consistency_across_state(self, sample_agent_state_buy):
+        """Test that all fields have correct types."""
+        state = sample_agent_state_buy
+
+        # String fields
+        string_fields = [
+            "company_of_interest",
+            "trade_date",
+            "market_report",
+            "sentiment_report",
+            "news_report",
+            "fundamentals_report",
+            "final_trade_decision",
+        ]
+
+        for field in string_fields:
+            if field in state:
+                assert isinstance(state[field], str), f"{field} should be string"
+
+        # Dict fields
+        dict_fields = ["investment_debate_state", "risk_debate_state"]
+
+        for field in dict_fields:
+            if field in state:
+                assert isinstance(state[field], dict), f"{field} should be dict"
+
+    def test_empty_state_detection(self):
+        """Test detection of completely empty state."""
+        empty_state = {}
+
+        result = validate_agent_state(empty_state)
+
+        assert result.is_valid is False
+        assert len(result.errors) >= 2  # At least missing company and date
--- a/tests/unit/test_output_validators.py
+++ b/tests/unit/test_output_validators.py
@ -0,0 +1,700 @@
+"""
+Test suite for Output Validation Utilities.
+
+This module tests:
+1. ValidationResult dataclass behavior
+2. Report completeness validation (length, markdown, sections)
+3. Decision quality validation (signal extraction, reasoning)
+4. Debate state validation (history, count, judge_decision)
+5. Complete agent state validation (orchestration)
+
+All tests use mocked data (no real API calls).
+"""
+
+import pytest
+from typing import Dict, Any
+
+from tradingagents.utils.output_validator import (
+    ValidationResult,
+    validate_report_completeness,
+    validate_decision_quality,
+    validate_debate_state,
+    validate_agent_state,
+)
+
+pytestmark = pytest.mark.unit
+
+
+# ============================================================================
+# Test ValidationResult Dataclass
+# ============================================================================
+
+class TestValidationResult:
+    """Test ValidationResult dataclass behavior."""
+
+    def test_default_valid_result(self):
+        """Test ValidationResult defaults to valid with empty lists."""
+        result = ValidationResult(is_valid=True)
+
+        assert result.is_valid is True
+        assert result.errors == []
+        assert result.warnings == []
+        assert result.metrics == {}
+
+    def test_add_error_marks_invalid(self):
+        """Test that add_error() marks result as invalid."""
+        result = ValidationResult(is_valid=True)
+        result.add_error("Something went wrong")
+
+        assert result.is_valid is False
+        assert len(result.errors) == 1
+        assert result.errors[0] == "Something went wrong"
+
+    def test_add_warning_keeps_valid(self):
+        """Test that add_warning() doesn't change validity."""
+        result = ValidationResult(is_valid=True)
+        result.add_warning("This could be better")
+
+        assert result.is_valid is True
+        assert len(result.warnings) == 1
+        assert result.warnings[0] == "This could be better"
+
+    def test_add_metric(self):
+        """Test that add_metric() stores key-value pairs."""
+        result = ValidationResult(is_valid=True)
+        result.add_metric("length", 500)
+        result.add_metric("signal", "BUY")
+
+        assert result.metrics["length"] == 500
+        assert result.metrics["signal"] == "BUY"
+
+    def test_multiple_errors_and_warnings(self):
+        """Test accumulating multiple errors and warnings."""
+        result = ValidationResult(is_valid=True)
+        result.add_error("Error 1")
+        result.add_error("Error 2")
+        result.add_warning("Warning 1")
+        result.add_warning("Warning 2")
+
+        assert result.is_valid is False
+        assert len(result.errors) == 2
+        assert len(result.warnings) == 2
+
+
+# ============================================================================
+# Test Report Validation
+# ============================================================================
+
+class TestReportValidation:
+    """Test validate_report_completeness() function."""
+
+    def test_valid_report_passes(self):
+        """Test that a valid report passes validation."""
+        report = "# Market Analysis\n\n" + "This is a comprehensive report. " * 50
+
+        result = validate_report_completeness(report, min_length=500)
+
+        assert result.is_valid is True
+        assert len(result.errors) == 0
+        assert result.metrics["length"] > 500
+
+    def test_none_report_fails(self):
+        """Test that None report fails validation."""
+        result = validate_report_completeness(None)
+
+        assert result.is_valid is False
+        assert "None" in result.errors[0]
+
+    def test_empty_report_fails(self):
+        """Test that empty report fails validation."""
+        result = validate_report_completeness("")
+
+        assert result.is_valid is False
+        assert "empty" in result.errors[0].lower()
+
+    def test_short_report_fails(self):
+        """Test that report below min_length fails."""
+        short_report = "Too short"
+
+        result = validate_report_completeness(short_report, min_length=500)
+
+        assert result.is_valid is False
+        assert any("minimum" in err.lower() for err in result.errors)
+        assert result.metrics["length"] < 500
+
+    def test_wrong_type_fails(self):
+        """Test that non-string report fails validation."""
+        result = validate_report_completeness(123)
+
+        assert result.is_valid is False
+        assert "string" in result.errors[0].lower()
+
+    def test_markdown_table_detection(self):
+        """Test detection of markdown tables."""
+        report_with_table = """
+        # Analysis
+
+        | Metric | Value |
+        |--------|-------|
+        | Price  | $100  |
+        | Volume | 1M    |
+        """ + "Additional text. " * 50
+
+        result = validate_report_completeness(
+            report_with_table,
+            min_length=200,
+            require_markdown_tables=True
+        )
+
+        assert result.is_valid is True
+        assert result.metrics["markdown_tables"] > 0
+
+    def test_missing_markdown_table_fails_when_required(self):
+        """Test that missing markdown tables fails when required."""
+        report = "# Analysis\n\n" + "No tables here. " * 50
+
+        result = validate_report_completeness(
+            report,
+            min_length=200,
+            require_markdown_tables=True
+        )
+
+        assert result.is_valid is False
+        assert any("table" in err.lower() for err in result.errors)
+
+    def test_section_header_detection(self):
+        """Test detection of section headers."""
+        report_with_headers = """
+        # Main Title
+        ## Subsection
+        ### Details
+
+        Content here.
+        """ + "More content. " * 50
+
+        result = validate_report_completeness(
+            report_with_headers,
+            min_length=200,
+            require_sections=True
+        )
+
+        assert result.is_valid is True
+        assert result.metrics["section_headers"] >= 3
+
+    def test_missing_sections_fails_when_required(self):
+        """Test that missing sections fails when required."""
+        report = "Just plain text. " * 50
+
+        result = validate_report_completeness(
+            report,
+            min_length=200,
+            require_sections=True
+        )
+
+        assert result.is_valid is False
+        assert any("section" in err.lower() for err in result.errors)
+
+    def test_short_report_warning(self):
+        """Test warning for relatively short reports."""
+        # Report is above min but below 1.5x min
+        report = "Short but valid. " * 40  # ~680 chars
+
+        result = validate_report_completeness(report, min_length=500)
+
+        assert result.is_valid is True
+        assert len(result.warnings) > 0
+        assert any("short" in warn.lower() for warn in result.warnings)
+
+    def test_bullet_point_detection(self):
+        """Test detection of bullet points."""
+        report_with_bullets = """
+        # Analysis
+
+        - Point 1
+        - Point 2
+        * Point 3
+
+        """ + "Additional content. " * 50
+
+        result = validate_report_completeness(report_with_bullets, min_length=200)
+
+        assert result.metrics["has_bullet_points"] is True
+
+    def test_unstructured_content_warning(self):
+        """Test warning for content lacking structure."""
+        unstructured_report = "Just a long stream of text without any structure. " * 50
+
+        result = validate_report_completeness(unstructured_report, min_length=500)
+
+        assert result.is_valid is True
+        assert any("structured" in warn.lower() for warn in result.warnings)
+
+
+# ============================================================================
+# Test Decision Validation
+# ============================================================================
+
+class TestDecisionValidation:
+    """Test validate_decision_quality() function."""
+
+    def test_valid_buy_decision(self):
+        """Test that valid BUY decision passes."""
+        decision = "BUY: Strong fundamentals and positive momentum"
+
+        result = validate_decision_quality(decision)
+
+        assert result.is_valid is True
+        assert result.metrics["signal"] == "BUY"
+        assert result.metrics["has_reasoning"] is True
+
+    def test_valid_sell_decision(self):
+        """Test that valid SELL decision passes."""
+        decision = "SELL: Overvalued with deteriorating fundamentals"
+
+        result = validate_decision_quality(decision)
+
+        assert result.is_valid is True
+        assert result.metrics["signal"] == "SELL"
+
+    def test_valid_hold_decision(self):
+        """Test that valid HOLD decision passes."""
+        decision = "HOLD: Mixed signals, awaiting clarity"
+
+        result = validate_decision_quality(decision)
+
+        assert result.is_valid is True
+        assert result.metrics["signal"] == "HOLD"
+
+    def test_case_insensitive_signal_extraction(self):
+        """Test that signals are extracted case-insensitively."""
+        decisions = [
+            "buy the stock",
+            "BUY the stock",
+            "Buy the stock",
+            "We should buy",
+        ]
+
+        for decision in decisions:
+            result = validate_decision_quality(decision)
+            assert result.metrics["signal"] == "BUY"
+
+    def test_none_decision_fails(self):
+        """Test that None decision fails validation."""
+        result = validate_decision_quality(None)
+
+        assert result.is_valid is False
+        assert "None" in result.errors[0]
+
+    def test_empty_decision_fails(self):
+        """Test that empty decision fails validation."""
+        result = validate_decision_quality("")
+
+        assert result.is_valid is False
+        assert "empty" in result.errors[0].lower()
+
+    def test_no_signal_fails(self):
+        """Test that decision without signal fails."""
+        decision = "This is a decision without a clear signal"
+
+        result = validate_decision_quality(decision)
+
+        assert result.is_valid is False
+        assert any("signal" in err.lower() for err in result.errors)
+        assert result.metrics["signal"] is None
+
+    def test_wrong_type_fails(self):
+        """Test that non-string decision fails."""
+        result = validate_decision_quality({"decision": "BUY"})
+
+        assert result.is_valid is False
+        assert "string" in result.errors[0].lower()
+
+    def test_multiple_signals_warning(self):
+        """Test warning for multiple conflicting signals."""
+        decision = "BUY or maybe SELL, hard to decide, could HOLD"
+
+        result = validate_decision_quality(decision)
+
+        # Should still extract first signal
+        assert result.metrics["signal"] == "BUY"
+        # But warn about conflicts
+        assert len(result.warnings) > 0
+        assert any("conflicting" in warn.lower() for warn in result.warnings)
+
+    def test_short_decision_warning(self):
+        """Test warning for very short decisions."""
+        decision = "BUY"
+
+        result = validate_decision_quality(decision)
+
+        assert result.is_valid is True
+        assert len(result.warnings) > 0
+        assert any("short" in warn.lower() for warn in result.warnings)
+
+    def test_decision_with_reasoning_markers(self):
+        """Test that reasoning markers are detected."""
+        decisions_with_reasoning = [
+            "BUY: Strong fundamentals",
+            "SELL. Company is overvalued.",
+            "HOLD because market is uncertain",
+        ]
+
+        for decision in decisions_with_reasoning:
+            result = validate_decision_quality(decision)
+            assert result.metrics["has_reasoning"] is True
+
+    def test_signal_count_metric(self):
+        """Test that signal_count metric is accurate."""
+        decision = "BUY BUY BUY! Strong signal to buy"
+
+        result = validate_decision_quality(decision)
+
+        assert result.metrics["signal_count"] == 4
+        assert result.metrics["signal"] == "BUY"
+
+
+# ============================================================================
+# Test Debate State Validation
+# ============================================================================
+
+class TestDebateStateValidation:
+    """Test validate_debate_state() function."""
+
+    def test_valid_invest_debate_state(self):
+        """Test that valid invest debate state passes."""
+        debate_state = {
+            "history": "Round 1: Bull argues...\nRound 2: Bear counters...",
+            "count": 2,
+            "judge_decision": "BUY: Bulls made stronger case",
+            "bull_history": "Bull argument",
+            "bear_history": "Bear argument",
+        }
+
+        result = validate_debate_state(debate_state, debate_type="invest")
+
+        assert result.is_valid is True
+        assert result.metrics["history_length"] > 0
+        assert result.metrics["count"] == 2
+        assert result.metrics["judge_signal"] == "BUY"
+
+    def test_valid_risk_debate_state(self):
+        """Test that valid risk debate state passes."""
+        debate_state = {
+            "history": "Round 1: Risky argues...\nRound 2: Safe counters...",
+            "count": 2,
+            "judge_decision": "HOLD: Balanced risk profile",
+            "risky_history": "Risky argument",
+            "safe_history": "Safe argument",
+            "neutral_history": "Neutral argument",
+        }
+
+        result = validate_debate_state(debate_state, debate_type="risk")
+
+        assert result.is_valid is True
+        assert result.metrics["count"] == 2
+
+    def test_none_debate_state_fails(self):
+        """Test that None debate state fails."""
+        result = validate_debate_state(None)
+
+        assert result.is_valid is False
+        assert "None" in result.errors[0]
+
+    def test_wrong_type_fails(self):
+        """Test that non-dict debate state fails."""
+        result = validate_debate_state("not a dict")
+
+        assert result.is_valid is False
+        assert "dict" in result.errors[0].lower()
+
+    def test_missing_required_fields_fails(self):
+        """Test that missing required fields fails."""
+        incomplete_state = {
+            "history": "Some history",
+            # Missing count and judge_decision
+        }
+
+        result = validate_debate_state(incomplete_state)
+
+        assert result.is_valid is False
+        assert any("missing" in err.lower() for err in result.errors)
+
+    def test_invalid_debate_type_fails(self):
+        """Test that unknown debate type fails."""
+        debate_state = {
+            "history": "History",
+            "count": 1,
+            "judge_decision": "BUY",
+        }
+
+        result = validate_debate_state(debate_state, debate_type="unknown")
+
+        assert result.is_valid is False
+        assert "unknown" in result.errors[0].lower()
+
+    def test_empty_history_warning(self):
+        """Test warning for empty history."""
+        debate_state = {
+            "history": "",
+            "count": 0,
+            "judge_decision": "HOLD",
+        }
+
+        result = validate_debate_state(debate_state)
+
+        assert result.is_valid is True
+        assert any("empty" in warn.lower() for warn in result.warnings)
+
+    def test_negative_count_fails(self):
+        """Test that negative count fails."""
+        debate_state = {
+            "history": "History",
+            "count": -1,
+            "judge_decision": "BUY",
+        }
+
+        result = validate_debate_state(debate_state)
+
+        assert result.is_valid is False
+        assert any("negative" in err.lower() for err in result.errors)
+
+    def test_high_count_warning(self):
+        """Test warning for very high debate count."""
+        debate_state = {
+            "history": "Long debate...",
+            "count": 15,
+            "judge_decision": "SELL",
+        }
+
+        result = validate_debate_state(debate_state)
+
+        assert result.is_valid is True
+        assert any("high" in warn.lower() for warn in result.warnings)
+
+    def test_invalid_judge_decision_warning(self):
+        """Test warning for poor quality judge decision."""
+        debate_state = {
+            "history": "History",
+            "count": 2,
+            "judge_decision": "No clear signal here",
+        }
+
+        result = validate_debate_state(debate_state)
+
+        assert result.is_valid is True
+        assert len(result.warnings) > 0
+
+    def test_optional_fields_metric(self):
+        """Test that optional fields are counted."""
+        debate_state = {
+            "history": "History",
+            "count": 1,
+            "judge_decision": "BUY",
+            "bull_history": "Bull",
+            "bear_history": "Bear",
+        }
+
+        result = validate_debate_state(debate_state, debate_type="invest")
+
+        assert result.metrics["optional_fields_present"] >= 2
+
+    def test_wrong_history_type_fails(self):
+        """Test that non-string history fails."""
+        debate_state = {
+            "history": 123,
+            "count": 1,
+            "judge_decision": "BUY",
+        }
+
+        result = validate_debate_state(debate_state)
+
+        assert result.is_valid is False
+        assert any("string" in err.lower() for err in result.errors)
+
+    def test_wrong_count_type_fails(self):
+        """Test that non-int count fails."""
+        debate_state = {
+            "history": "History",
+            "count": "two",
+            "judge_decision": "BUY",
+        }
+
+        result = validate_debate_state(debate_state)
+
+        assert result.is_valid is False
+        assert any("int" in err.lower() for err in result.errors)
+
+
+# ============================================================================
+# Test Agent State Validation
+# ============================================================================
+
+class TestAgentStateValidation:
+    """Test validate_agent_state() function."""
+
+    def test_valid_complete_agent_state(self):
+        """Test that complete valid agent state passes."""
+        state = {
+            "company_of_interest": "AAPL",
+            "trade_date": "2024-01-15",
+            "market_report": "# Market Analysis\n\n" + "Detailed analysis. " * 100,
+            "sentiment_report": "# Sentiment Report\n\n" + "Social sentiment. " * 100,
+            "news_report": "# News Report\n\n" + "Latest news. " * 100,
+            "fundamentals_report": "# Fundamentals\n\n" + "Financial data. " * 100,
+            "investment_debate_state": {
+                "history": "Debate history",
+                "count": 3,
+                "judge_decision": "BUY: Strong case",
+            },
+            "risk_debate_state": {
+                "history": "Risk debate",
+                "count": 2,
+                "judge_decision": "HOLD: Moderate risk",
+            },
+            "final_trade_decision": "BUY: All signals align positively",
+        }
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is True
+        assert result.metrics["company_of_interest"] == "AAPL"
+        assert result.metrics["trade_date"] == "2024-01-15"
+        assert result.metrics["reports_present"] == 4
+        assert result.metrics["final_signal"] == "BUY"
+
+    def test_none_state_fails(self):
+        """Test that None state fails."""
+        result = validate_agent_state(None)
+
+        assert result.is_valid is False
+        assert "None" in result.errors[0]
+
+    def test_wrong_type_fails(self):
+        """Test that non-dict state fails."""
+        result = validate_agent_state("not a dict")
+
+        assert result.is_valid is False
+        assert "dict" in result.errors[0].lower()
+
+    def test_missing_company_fails(self):
+        """Test that missing company fails."""
+        state = {
+            "trade_date": "2024-01-15",
+        }
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is False
+        assert any("company" in err.lower() for err in result.errors)
+
+    def test_missing_trade_date_fails(self):
+        """Test that missing trade date fails."""
+        state = {
+            "company_of_interest": "AAPL",
+        }
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is False
+        assert any("trade_date" in err.lower() for err in result.errors)
+
+    def test_incomplete_reports_warning(self):
+        """Test warning when some reports are missing."""
+        state = {
+            "company_of_interest": "AAPL",
+            "trade_date": "2024-01-15",
+            "market_report": "Market analysis. " * 100,
+            # Missing other reports
+        }
+
+        result = validate_agent_state(state)
+
+        # Basic fields present, so valid
+        assert result.is_valid is True
+        # But warn about missing reports
+        assert len(result.warnings) > 0
+        assert result.metrics["reports_present"] < 4
+
+    def test_invalid_report_warning(self):
+        """Test warning for invalid report content."""
+        state = {
+            "company_of_interest": "AAPL",
+            "trade_date": "2024-01-15",
+            "market_report": "Too short",  # Below min length
+        }
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is True
+        assert any("market_report" in warn.lower() for warn in result.warnings)
+
+    def test_invalid_invest_debate_warning(self):
+        """Test warning for invalid investment debate."""
+        state = {
+            "company_of_interest": "AAPL",
+            "trade_date": "2024-01-15",
+            "investment_debate_state": {
+                # Missing required fields
+                "history": "History",
+            },
+        }
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is True
+        assert any("investment debate" in warn.lower() for warn in result.warnings)
+
+    def test_invalid_risk_debate_warning(self):
+        """Test warning for invalid risk debate."""
+        state = {
+            "company_of_interest": "AAPL",
+            "trade_date": "2024-01-15",
+            "risk_debate_state": {
+                "count": -1,  # Invalid
+            },
+        }
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is True
+        assert any("risk debate" in warn.lower() for warn in result.warnings)
+
+    def test_invalid_final_decision_warning(self):
+        """Test warning for invalid final decision."""
+        state = {
+            "company_of_interest": "AAPL",
+            "trade_date": "2024-01-15",
+            "final_trade_decision": "No clear signal",
+        }
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is True
+        assert any("final decision" in warn.lower() for warn in result.warnings)
+
+    def test_incomplete_state_warning(self):
+        """Test warning for very incomplete state."""
+        state = {
+            "company_of_interest": "AAPL",
+            "trade_date": "2024-01-15",
+            # No debates or decision
+        }
+
+        result = validate_agent_state(state)
+
+        assert result.is_valid is True
+        assert any("incomplete" in warn.lower() for warn in result.warnings)
+
+    def test_reports_count_metrics(self):
+        """Test that report counts are tracked."""
+        state = {
+            "company_of_interest": "AAPL",
+            "trade_date": "2024-01-15",
+            "market_report": "Report. " * 100,
+            "sentiment_report": "Report. " * 100,
+        }
+
+        result = validate_agent_state(state)
+
+        assert result.metrics["reports_present"] == 2
+        assert result.metrics["total_reports_expected"] == 4
--- a/tradingagents/utils/output_validator.py
+++ b/tradingagents/utils/output_validator.py
@ -0,0 +1,453 @@
+"""
+Output validation utilities for agent outputs.
+
+This module provides validation functions for:
+- Report completeness (length, structure, markdown formatting)
+- Decision quality (signal extraction, reasoning clarity)
+- Debate state coherence (history tracking, judge decisions)
+- Complete agent state validation
+
+All validators return ValidationResult with actionable feedback.
+"""
+
+from dataclasses import dataclass, field
+from typing import List, Optional, Dict, Any
+import re
+
+
+@dataclass
+class ValidationResult:
+    """
+    Result of a validation check with actionable feedback.
+
+    Attributes:
+        is_valid: True if validation passed, False otherwise
+        errors: List of error messages (validation failures)
+        warnings: List of warning messages (quality concerns)
+        metrics: Dictionary of measured metrics (e.g., length, counts)
+    """
+    is_valid: bool
+    errors: List[str] = field(default_factory=list)
+    warnings: List[str] = field(default_factory=list)
+    metrics: Dict[str, Any] = field(default_factory=dict)
+
+    def add_error(self, message: str) -> None:
+        """Add an error and mark validation as failed."""
+        self.errors.append(message)
+        self.is_valid = False
+
+    def add_warning(self, message: str) -> None:
+        """Add a warning (doesn't fail validation)."""
+        self.warnings.append(message)
+
+    def add_metric(self, key: str, value: Any) -> None:
+        """Add a measured metric."""
+        self.metrics[key] = value
+
+
+def validate_report_completeness(
+    report: Optional[str],
+    min_length: int = 500,
+    require_markdown_tables: bool = False,
+    require_sections: bool = False,
+) -> ValidationResult:
+    """
+    Validate that a report is complete and well-structured.
+
+    Args:
+        report: The report text to validate
+        min_length: Minimum character count required (default: 500)
+        require_markdown_tables: Whether to require markdown tables
+        require_sections: Whether to require section headers (##)
+
+    Returns:
+        ValidationResult with errors, warnings, and metrics
+
+    Example:
+        >>> result = validate_report_completeness("# Report\\n\\nThis is too short")
+        >>> assert not result.is_valid
+        >>> assert "minimum length" in result.errors[0].lower()
+    """
+    result = ValidationResult(is_valid=True)
+
+    # Check if report exists
+    if report is None:
+        result.add_error("Report is None")
+        return result
+
+    if not isinstance(report, str):
+        result.add_error(f"Report must be string, got {type(report).__name__}")
+        return result
+
+    # Check length
+    report_length = len(report.strip())
+    result.add_metric("length", report_length)
+
+    if report_length == 0:
+        result.add_error("Report is empty")
+        return result
+
+    if report_length < min_length:
+        result.add_error(
+            f"Report length ({report_length}) below minimum ({min_length})"
+        )
+
+    # Check for markdown tables
+    markdown_tables = re.findall(r'\|.*\|', report)
+    result.add_metric("markdown_tables", len(markdown_tables))
+
+    if require_markdown_tables and len(markdown_tables) == 0:
+        result.add_error("Report missing required markdown tables")
+
+    # Check for section headers (allow optional leading whitespace)
+    section_headers = re.findall(r'^\s*#{1,6}\s+.+$', report, re.MULTILINE)
+    result.add_metric("section_headers", len(section_headers))
+
+    if require_sections and len(section_headers) == 0:
+        result.add_error("Report missing required section headers")
+
+    # Quality warnings
+    if report_length < min_length * 1.5:
+        result.add_warning(
+            f"Report is relatively short ({report_length} chars). "
+            f"Consider adding more detail."
+        )
+
+    # Check for basic structure indicators
+    has_bullet_points = bool(re.search(r'^\s*[-*]\s+', report, re.MULTILINE))
+    result.add_metric("has_bullet_points", has_bullet_points)
+
+    if not has_bullet_points and not markdown_tables:
+        result.add_warning("Report lacks structured content (no bullets or tables)")
+
+    return result
+
+
+def validate_decision_quality(decision: Optional[str]) -> ValidationResult:
+    """
+    Validate trading decision quality and extract signal.
+
+    Validates:
+    - Decision is not None/empty
+    - Contains clear BUY/SELL/HOLD signal
+    - Has reasoning/explanation
+    - Signal is unambiguous
+
+    Args:
+        decision: The decision text to validate
+
+    Returns:
+        ValidationResult with extracted signal in metrics
+
+    Example:
+        >>> result = validate_decision_quality("BUY: Strong fundamentals")
+        >>> assert result.is_valid
+        >>> assert result.metrics["signal"] == "BUY"
+    """
+    result = ValidationResult(is_valid=True)
+
+    # Check if decision exists
+    if decision is None:
+        result.add_error("Decision is None")
+        return result
+
+    if not isinstance(decision, str):
+        result.add_error(f"Decision must be string, got {type(decision).__name__}")
+        return result
+
+    decision_clean = decision.strip()
+    if not decision_clean:
+        result.add_error("Decision is empty")
+        return result
+
+    result.add_metric("length", len(decision_clean))
+
+    # Extract trading signal (case-insensitive)
+    signal_pattern = r'\b(BUY|SELL|HOLD)\b'
+    matches = re.findall(signal_pattern, decision_clean, re.IGNORECASE)
+
+    if not matches:
+        result.add_error(
+            "No clear trading signal found (expected BUY, SELL, or HOLD)"
+        )
+        result.add_metric("signal", None)
+        return result
+
+    # Get first signal and normalize to uppercase
+    signal = matches[0].upper()
+    result.add_metric("signal", signal)
+    result.add_metric("signal_count", len(matches))
+
+    # Warn if multiple conflicting signals
+    unique_signals = set(m.upper() for m in matches)
+    if len(unique_signals) > 1:
+        result.add_warning(
+            f"Multiple conflicting signals found: {unique_signals}. "
+            f"Using first occurrence: {signal}"
+        )
+
+    # Check for reasoning
+    # Split by common delimiters and check if there's explanation
+    has_reasoning = any([
+        ':' in decision_clean,
+        '.' in decision_clean,
+        len(decision_clean.split()) >= 5,
+    ])
+
+    result.add_metric("has_reasoning", has_reasoning)
+
+    if not has_reasoning:
+        result.add_warning(
+            "Decision lacks clear reasoning or explanation"
+        )
+
+    # Check decision length
+    if len(decision_clean) < 20:
+        result.add_warning(
+            f"Decision is very short ({len(decision_clean)} chars). "
+            f"Consider adding more rationale."
+        )
+
+    return result
+
+
+def validate_debate_state(
+    debate_state: Optional[Dict[str, Any]],
+    debate_type: str = "invest",
+) -> ValidationResult:
+    """
+    Validate debate state structure and coherence.
+
+    Validates:
+    - Required fields present (history, count, judge_decision)
+    - History is not empty
+    - Count is reasonable (>= 0)
+    - Judge decision exists if debate concluded
+
+    Args:
+        debate_state: The debate state dictionary to validate
+        debate_type: Type of debate ("invest" or "risk")
+
+    Returns:
+        ValidationResult with debate metrics
+
+    Example:
+        >>> state = {"history": "Round 1...", "count": 1, "judge_decision": "BUY"}
+        >>> result = validate_debate_state(state)
+        >>> assert result.is_valid
+    """
+    result = ValidationResult(is_valid=True)
+
+    # Check if state exists
+    if debate_state is None:
+        result.add_error("Debate state is None")
+        return result
+
+    if not isinstance(debate_state, dict):
+        result.add_error(
+            f"Debate state must be dict, got {type(debate_state).__name__}"
+        )
+        return result
+
+    # Define required fields based on debate type
+    if debate_type == "invest":
+        required_fields = ["history", "count", "judge_decision"]
+        optional_fields = ["bull_history", "bear_history", "current_response"]
+    elif debate_type == "risk":
+        required_fields = ["history", "count", "judge_decision"]
+        optional_fields = [
+            "risky_history",
+            "safe_history",
+            "neutral_history",
+            "latest_speaker",
+            "current_risky_response",
+            "current_safe_response",
+            "current_neutral_response",
+        ]
+    else:
+        result.add_error(f"Unknown debate type: {debate_type}")
+        return result
+
+    # Check required fields
+    missing_fields = [f for f in required_fields if f not in debate_state]
+    if missing_fields:
+        result.add_error(f"Missing required fields: {missing_fields}")
+        return result
+
+    # Validate history
+    history = debate_state.get("history")
+    if history is not None:
+        if not isinstance(history, str):
+            result.add_error(
+                f"History must be string, got {type(history).__name__}"
+            )
+        elif not history.strip():
+            result.add_warning("History is empty")
+        else:
+            result.add_metric("history_length", len(history))
+
+    # Validate count
+    count = debate_state.get("count")
+    if count is not None:
+        if not isinstance(count, int):
+            result.add_error(f"Count must be int, got {type(count).__name__}")
+        elif count < 0:
+            result.add_error(f"Count cannot be negative: {count}")
+        else:
+            result.add_metric("count", count)
+
+            # Warn if debate went too long
+            if count > 10:
+                result.add_warning(
+                    f"Debate count is very high ({count}). "
+                    f"May indicate convergence issues."
+                )
+
+    # Validate judge decision
+    judge_decision = debate_state.get("judge_decision")
+    if judge_decision is not None:
+        if isinstance(judge_decision, str):
+            if judge_decision.strip():
+                # Validate decision quality
+                decision_result = validate_decision_quality(judge_decision)
+                if not decision_result.is_valid:
+                    result.add_warning(
+                        f"Judge decision has quality issues: "
+                        f"{', '.join(decision_result.errors)}"
+                    )
+                else:
+                    result.add_metric("judge_signal", decision_result.metrics.get("signal"))
+            else:
+                result.add_warning("Judge decision is empty")
+        else:
+            result.add_error(
+                f"Judge decision must be string, got {type(judge_decision).__name__}"
+            )
+
+    # Check optional fields for completeness
+    present_optional = [f for f in optional_fields if f in debate_state]
+    result.add_metric("optional_fields_present", len(present_optional))
+
+    return result
+
+
+def validate_agent_state(state: Optional[Dict[str, Any]]) -> ValidationResult:
+    """
+    Validate complete agent state structure.
+
+    Orchestrates all validators to check:
+    - Company and trade date present
+    - All reports complete
+    - Investment debate state valid
+    - Risk debate state valid
+    - Final decision quality
+
+    Args:
+        state: The complete agent state dictionary
+
+    Returns:
+        ValidationResult with comprehensive validation
+
+    Example:
+        >>> state = {
+        ...     "company_of_interest": "AAPL",
+        ...     "trade_date": "2024-01-15",
+        ...     "market_report": "Market analysis..." * 100,
+        ... }
+        >>> result = validate_agent_state(state)
+        >>> assert "company_of_interest" in result.metrics
+    """
+    result = ValidationResult(is_valid=True)
+
+    # Check if state exists
+    if state is None:
+        result.add_error("Agent state is None")
+        return result
+
+    if not isinstance(state, dict):
+        result.add_error(f"Agent state must be dict, got {type(state).__name__}")
+        return result
+
+    # Validate basic fields
+    company = state.get("company_of_interest")
+    if not company:
+        result.add_error("Missing company_of_interest")
+    else:
+        result.add_metric("company_of_interest", company)
+
+    trade_date = state.get("trade_date")
+    if not trade_date:
+        result.add_error("Missing trade_date")
+    else:
+        result.add_metric("trade_date", trade_date)
+
+    # Validate reports
+    report_fields = [
+        "market_report",
+        "sentiment_report",
+        "news_report",
+        "fundamentals_report",
+    ]
+
+    reports_present = 0
+    for report_field in report_fields:
+        report = state.get(report_field)
+        if report:
+            reports_present += 1
+            report_result = validate_report_completeness(
+                report,
+                min_length=500,
+                require_markdown_tables=False,
+                require_sections=False,
+            )
+            if not report_result.is_valid:
+                result.add_warning(
+                    f"{report_field} has issues: {', '.join(report_result.errors)}"
+                )
+
+    result.add_metric("reports_present", reports_present)
+    result.add_metric("total_reports_expected", len(report_fields))
+
+    if reports_present < len(report_fields):
+        result.add_warning(
+            f"Only {reports_present}/{len(report_fields)} reports present"
+        )
+
+    # Validate investment debate state
+    invest_debate = state.get("investment_debate_state")
+    if invest_debate:
+        invest_result = validate_debate_state(invest_debate, debate_type="invest")
+        if not invest_result.is_valid:
+            result.add_warning(
+                f"Investment debate has issues: {', '.join(invest_result.errors)}"
+            )
+        result.add_metric("investment_debate_valid", invest_result.is_valid)
+
+    # Validate risk debate state
+    risk_debate = state.get("risk_debate_state")
+    if risk_debate:
+        risk_result = validate_debate_state(risk_debate, debate_type="risk")
+        if not risk_result.is_valid:
+            result.add_warning(
+                f"Risk debate has issues: {', '.join(risk_result.errors)}"
+            )
+        result.add_metric("risk_debate_valid", risk_result.is_valid)
+
+    # Validate final decision
+    final_decision = state.get("final_trade_decision")
+    if final_decision:
+        decision_result = validate_decision_quality(final_decision)
+        if not decision_result.is_valid:
+            result.add_warning(
+                f"Final decision has issues: {', '.join(decision_result.errors)}"
+            )
+        else:
+            result.add_metric("final_signal", decision_result.metrics.get("signal"))
+
+    # Overall completeness check
+    if not invest_debate and not risk_debate:
+        result.add_warning(
+            "State appears incomplete: no debate states present"
+        )
+
+    return result