feat(tests): add UAT and evaluation tests for agent outputs - Fixes #53

- Created tradingagents/utils/output_validator.py with ValidationResult dataclass

- Added validate_report_completeness(), validate_decision_quality() for content validation

- Added validate_debate_state(), validate_agent_state() for state coherence

- Created tests/unit/test_output_validators.py with 54 unit tests

- Created tests/e2e/test_uat_agent_outputs.py with 23 UAT scenarios

- Added agent state fixtures to tests/conftest.py (sample_agent_state, debates)

- Total: 77 tests covering report quality, signal extraction, and state integrity

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Andrew Kaszubski 2025-12-26 11:38:37 +11:00
parent b4653ca37b
commit e5575250df
4 changed files with 1916 additions and 0 deletions

View File

@ -383,3 +383,271 @@ def openrouter_config():
"backend_url": "https://openrouter.ai/api/v1",
})
return config
# ============================================================================
# Agent Output Validation Fixtures (Issue #53)
# ============================================================================
@pytest.fixture
def sample_agent_state():
"""
Create a complete sample agent state for testing.
Provides a fully populated agent state with all required fields
including reports, debate states, and final decision.
Scope: function (default)
Returns:
dict: Complete agent state with all fields populated
Example:
def test_complete_state(sample_agent_state):
assert sample_agent_state["company_of_interest"] == "AAPL"
assert "market_report" in sample_agent_state
"""
return {
"company_of_interest": "AAPL",
"trade_date": "2024-01-15",
"market_report": """
# Market Analysis for AAPL
## Technical Indicators
Strong bullish momentum with RSI at 55 and MACD showing positive divergence.
Price has broken through key resistance at $175.
## Volume Analysis
Above-average volume on recent upward moves indicates strong buyer interest.
Institutional accumulation pattern observed over the past 2 weeks.
## Price Action
Clear higher highs and higher lows pattern establishing uptrend.
Support level established at $170 with strong buying pressure.
""" + "Additional detailed analysis. " * 30,
"sentiment_report": """
# Social Media Sentiment Analysis
## Overall Sentiment
Strongly positive sentiment across major platforms (Twitter, Reddit, StockTwits).
Sentiment score: 8.5/10 based on 10,000+ analyzed posts.
## Key Themes
- New product launch excitement
- Strong quarterly earnings anticipation
- Innovation leadership recognition
## Influencer Activity
Major tech influencers bullish on near-term prospects.
""" + "More sentiment details. " * 30,
"news_report": """
# News Analysis
## Recent Headlines
- Major product announcement driving positive coverage
- Analyst upgrades from 3 top firms this week
- Partnership announcements in AI space
## Coverage Tone
85% positive, 10% neutral, 5% negative across 50 major news sources.
## Impact Assessment
News flow strongly supportive of bullish thesis.
""" + "Additional news analysis. " * 30,
"fundamentals_report": """
# Fundamental Analysis
## Financial Metrics
| Metric | Value | Industry Avg |
|--------|-------|--------------|
| P/E | 28 | 25 |
| ROE | 45% | 20% |
| Revenue Growth | 12% | 8% |
## Balance Sheet
Strong cash position of $150B, low debt-to-equity ratio.
## Earnings Quality
Consistent earnings growth with strong margins.
""" + "Detailed fundamental analysis. " * 30,
"investment_debate_state": {
"history": "Round 1: Bull presents case for strong buy...\nRound 2: Bear raises concerns about valuation...\nRound 3: Bull counters with growth prospects...",
"count": 3,
"judge_decision": "BUY: Bulls made a compelling case with strong fundamentals and positive momentum",
"bull_history": "Strong fundamentals, positive momentum, innovation leadership",
"bear_history": "Slight valuation concerns, market volatility risk",
"current_response": "Final recommendation is BUY",
},
"risk_debate_state": {
"history": "Round 1: Risk assessment begins...\nRound 2: Conservative view presented...",
"count": 2,
"judge_decision": "BUY: Risk is acceptable given strong fundamentals",
"risky_history": "High potential upside justifies position",
"safe_history": "Proceed with caution, good fundamentals",
"neutral_history": "Balanced risk-reward at current levels",
"latest_speaker": "neutral",
"current_risky_response": "Strong buy",
"current_safe_response": "Moderate buy",
"current_neutral_response": "Buy with standard position sizing",
},
"final_trade_decision": "BUY: Strong consensus across all analysis teams. Fundamentals solid, technicals bullish, sentiment positive. Entry at current levels recommended with standard position sizing.",
"investment_plan": "Initiate position with 2% portfolio allocation",
"trader_investment_plan": "Execute market order for calculated position size",
"sender": "trader",
}
@pytest.fixture
def sample_agent_state_buy(sample_agent_state):
"""
Sample agent state with BUY decision.
Returns complete state configured for BUY scenario.
Scope: function (default)
Example:
def test_buy_scenario(sample_agent_state_buy):
assert "BUY" in sample_agent_state_buy["final_trade_decision"]
"""
return sample_agent_state
@pytest.fixture
def sample_agent_state_sell():
"""
Sample agent state with SELL decision.
Provides a complete state where all analyses point to SELL.
Scope: function (default)
Returns:
dict: Agent state with SELL decision
Example:
def test_sell_scenario(sample_agent_state_sell):
assert "SELL" in sample_agent_state_sell["final_trade_decision"]
"""
return {
"company_of_interest": "TSLA",
"trade_date": "2024-01-20",
"market_report": "# Market Analysis\n\nBearish technical pattern with breakdown below support. " + "Detailed analysis. " * 50,
"sentiment_report": "# Sentiment Analysis\n\nNegative sentiment prevailing across platforms. " + "More details. " * 50,
"news_report": "# News Report\n\nMultiple negative headlines and analyst downgrades. " + "Additional coverage. " * 50,
"fundamentals_report": "# Fundamentals\n\nDeteriorating metrics and earnings concerns. " + "Financial details. " * 50,
"investment_debate_state": {
"history": "Round 1: Bear presents strong sell case...\nRound 2: Bull unable to counter effectively...",
"count": 2,
"judge_decision": "SELL: Bears made compelling case with fundamental concerns",
"bull_history": "Limited upside potential",
"bear_history": "Strong downside risk, overvalued",
},
"risk_debate_state": {
"history": "Round 1: Risk analysis shows high downside...",
"count": 1,
"judge_decision": "SELL: Exit position to preserve capital",
"risky_history": "Too risky, exit recommended",
"safe_history": "Definitely sell",
"neutral_history": "Sell is prudent",
},
"final_trade_decision": "SELL: Consensus to exit position. Fundamentals weak, technicals bearish, sentiment negative.",
}
@pytest.fixture
def sample_agent_state_hold():
"""
Sample agent state with HOLD decision.
Provides a complete state where analyses are mixed, leading to HOLD.
Scope: function (default)
Returns:
dict: Agent state with HOLD decision
Example:
def test_hold_scenario(sample_agent_state_hold):
assert "HOLD" in sample_agent_state_hold["final_trade_decision"]
"""
return {
"company_of_interest": "GOOGL",
"trade_date": "2024-01-22",
"market_report": "# Market Analysis\n\nMixed signals with consolidation pattern. " + "Technical details. " * 50,
"sentiment_report": "# Sentiment Analysis\n\nNeutral sentiment, market awaiting catalyst. " + "Sentiment data. " * 50,
"news_report": "# News Report\n\nBalanced news flow, no major catalysts. " + "News details. " * 50,
"fundamentals_report": "# Fundamentals\n\nSolid but not compelling, fairly valued. " + "Financial data. " * 50,
"investment_debate_state": {
"history": "Round 1: Bull and Bear present balanced views...\nRound 2: No clear winner...\nRound 3: Continued debate...",
"count": 3,
"judge_decision": "HOLD: Insufficient conviction either way, maintain position",
"bull_history": "Some positives but not strong",
"bear_history": "Some concerns but not severe",
},
"risk_debate_state": {
"history": "Round 1: Risk assessment shows balanced profile...",
"count": 1,
"judge_decision": "HOLD: Risk-reward balanced, no action needed",
"risky_history": "Could go either way",
"safe_history": "Wait for clarity",
"neutral_history": "Hold is appropriate",
},
"final_trade_decision": "HOLD: Mixed signals across analysis teams. Await further clarity before making move.",
}
@pytest.fixture
def sample_invest_debate():
"""
Sample investment debate state.
Provides a complete investment debate state for isolated testing.
Scope: function (default)
Returns:
dict: Investment debate state (InvestDebateState)
Example:
def test_debate(sample_invest_debate):
assert sample_invest_debate["count"] > 0
"""
return {
"history": "Round 1: Bull argues for strong buy based on fundamentals...\nRound 2: Bear raises valuation concerns...\nRound 3: Bull counters with growth prospects...",
"count": 3,
"judge_decision": "BUY: Bulls presented stronger evidence",
"bull_history": "Strong fundamentals, positive technicals, good sentiment",
"bear_history": "Valuation slightly stretched, some market risk",
"current_response": "Recommend BUY with conviction",
}
@pytest.fixture
def sample_risk_debate():
"""
Sample risk debate state.
Provides a complete risk debate state for isolated testing.
Scope: function (default)
Returns:
dict: Risk debate state (RiskDebateState)
Example:
def test_risk_debate(sample_risk_debate):
assert sample_risk_debate["count"] > 0
"""
return {
"history": "Round 1: Risk analysts evaluate position sizing...\nRound 2: Discussion on risk parameters...",
"count": 2,
"judge_decision": "BUY: Risk acceptable with standard position size",
"risky_history": "Aggressive position justified by strong signals",
"safe_history": "Conservative position appropriate given uncertainty",
"neutral_history": "Standard position sizing recommended",
"latest_speaker": "neutral",
"current_risky_response": "Take larger position",
"current_safe_response": "Take smaller position",
"current_neutral_response": "Standard position is balanced",
}

View File

@ -0,0 +1,495 @@
"""
UAT (User Acceptance Testing) for Agent Output Quality.
This module provides end-to-end tests for complete agent workflows:
1. Complete analysis workflow (BUY/SELL/HOLD scenarios)
2. Edge case handling (missing data, conflicting reports)
3. Content quality validation (length, structure, clarity)
4. State integrity checks (field presence, debate coherence)
All tests use mocked data to avoid real API calls.
"""
import pytest
from typing import Dict, Any
from tradingagents.utils.output_validator import (
validate_agent_state,
validate_decision_quality,
validate_debate_state,
validate_report_completeness,
)
pytestmark = pytest.mark.e2e
# ============================================================================
# Test Complete Analysis Workflow
# ============================================================================
class TestCompleteAnalysisWorkflow:
"""Test complete agent analysis workflow for different trading scenarios."""
def test_buy_scenario_complete_workflow(self, sample_agent_state_buy):
"""
Test complete BUY scenario workflow.
Validates:
- All reports generated
- Investment debate concludes with BUY
- Risk debate validates decision
- Final decision is BUY with reasoning
"""
state = sample_agent_state_buy
# Validate complete state
result = validate_agent_state(state)
assert result.is_valid is True
assert result.metrics["company_of_interest"] == "AAPL"
assert result.metrics["reports_present"] == 4
assert result.metrics["final_signal"] == "BUY"
assert result.metrics["investment_debate_valid"] is True
assert result.metrics["risk_debate_valid"] is True
def test_sell_scenario_complete_workflow(self, sample_agent_state_sell):
"""
Test complete SELL scenario workflow.
Validates:
- All reports generated
- Investment debate concludes with SELL
- Risk debate validates decision
- Final decision is SELL with reasoning
"""
state = sample_agent_state_sell
result = validate_agent_state(state)
assert result.is_valid is True
assert result.metrics["final_signal"] == "SELL"
assert result.metrics["reports_present"] == 4
def test_hold_scenario_complete_workflow(self, sample_agent_state_hold):
"""
Test complete HOLD scenario workflow.
Validates:
- All reports generated
- Investment debate is inconclusive or balanced
- Risk debate recommends caution
- Final decision is HOLD with reasoning
"""
state = sample_agent_state_hold
result = validate_agent_state(state)
assert result.is_valid is True
assert result.metrics["final_signal"] == "HOLD"
def test_workflow_preserves_debate_history(self, sample_agent_state_buy):
"""Test that debate history is preserved throughout workflow."""
state = sample_agent_state_buy
invest_debate = state["investment_debate_state"]
risk_debate = state["risk_debate_state"]
# Validate both debates have history
invest_result = validate_debate_state(invest_debate, debate_type="invest")
risk_result = validate_debate_state(risk_debate, debate_type="risk")
assert invest_result.metrics["history_length"] > 0
assert risk_result.metrics["history_length"] > 0
assert invest_result.metrics["count"] > 0
assert risk_result.metrics["count"] > 0
def test_workflow_all_reports_meet_quality_standards(self, sample_agent_state_buy):
"""Test that all generated reports meet quality standards."""
state = sample_agent_state_buy
reports = [
state["market_report"],
state["sentiment_report"],
state["news_report"],
state["fundamentals_report"],
]
for report in reports:
result = validate_report_completeness(
report,
min_length=500,
require_markdown_tables=False,
require_sections=False,
)
assert result.is_valid is True
assert result.metrics["length"] >= 500
# ============================================================================
# Test Edge Case Scenarios
# ============================================================================
class TestEdgeCaseScenarios:
"""Test handling of edge cases and unusual scenarios."""
def test_missing_single_report_graceful_degradation(self):
"""Test that workflow continues with one missing report."""
state = {
"company_of_interest": "TSLA",
"trade_date": "2024-01-20",
"market_report": "Market analysis. " * 100,
"sentiment_report": "Sentiment analysis. " * 100,
"news_report": "News analysis. " * 100,
# Missing fundamentals_report
"investment_debate_state": {
"history": "Debate based on available data",
"count": 3,
"judge_decision": "HOLD: Incomplete data, proceeding cautiously",
},
"risk_debate_state": {
"history": "Risk assessment",
"count": 2,
"judge_decision": "HOLD: Missing fundamentals increases uncertainty",
},
"final_trade_decision": "HOLD: Awaiting fundamental data",
}
result = validate_agent_state(state)
# Should still be valid but with warnings
assert result.is_valid is True
assert result.metrics["reports_present"] == 3
assert len(result.warnings) > 0
def test_conflicting_debate_conclusions_warning(self):
"""Test warning when investment and risk debates conflict."""
state = {
"company_of_interest": "GOOGL",
"trade_date": "2024-01-22",
"market_report": "Report. " * 100,
"sentiment_report": "Report. " * 100,
"news_report": "Report. " * 100,
"fundamentals_report": "Report. " * 100,
"investment_debate_state": {
"history": "Bullish debate",
"count": 2,
"judge_decision": "BUY: Strong upside potential",
},
"risk_debate_state": {
"history": "Risk concerns",
"count": 2,
"judge_decision": "SELL: Risk too high", # Conflicts with invest
},
"final_trade_decision": "HOLD: Conflicting signals from teams",
}
result = validate_agent_state(state)
assert result.is_valid is True
# Different signals detected
assert result.metrics.get("final_signal") == "HOLD"
def test_empty_debate_history_but_valid_decision(self):
"""Test handling of empty debate history with valid decision."""
state = {
"company_of_interest": "MSFT",
"trade_date": "2024-01-25",
"market_report": "Report. " * 100,
"investment_debate_state": {
"history": "", # Empty history
"count": 0,
"judge_decision": "HOLD: Insufficient deliberation",
},
"final_trade_decision": "HOLD: More analysis needed",
}
result = validate_agent_state(state)
assert result.is_valid is True
assert len(result.warnings) > 0 # Should warn about empty history
def test_very_long_debate_convergence_issue(self):
"""Test detection of debates that went too long."""
state = {
"company_of_interest": "NVDA",
"trade_date": "2024-01-28",
"market_report": "Report. " * 100,
"investment_debate_state": {
"history": "Round 1...\nRound 2...\n" * 15,
"count": 15, # Very high count
"judge_decision": "BUY: Finally reached consensus",
},
"final_trade_decision": "BUY: After extensive deliberation",
}
result = validate_agent_state(state)
assert result.is_valid is True
# Should have warnings about high debate count
invest_debate_result = validate_debate_state(
state["investment_debate_state"],
debate_type="invest"
)
assert len(invest_debate_result.warnings) > 0
def test_malformed_but_extractable_decision(self):
"""Test extraction of signal from poorly formatted decision."""
decisions = [
"i think we should BUY this stock",
"recommendation: buy",
"buy!!!",
"Final call is to buy the position",
]
for decision in decisions:
result = validate_decision_quality(decision)
assert result.metrics["signal"] == "BUY"
def test_missing_all_debate_states(self):
"""Test handling when no debates occurred."""
state = {
"company_of_interest": "META",
"trade_date": "2024-02-01",
"market_report": "Report. " * 100,
# No debate states
"final_trade_decision": "HOLD: No consensus reached",
}
result = validate_agent_state(state)
assert result.is_valid is True
assert len(result.warnings) > 0
assert any("incomplete" in w.lower() for w in result.warnings)
# ============================================================================
# Test Content Quality
# ============================================================================
class TestContentQuality:
"""Test content quality validation across all outputs."""
def test_report_minimum_length_enforcement(self):
"""Test that all reports meet minimum length requirements."""
short_reports = [
"Too short",
"Also short",
"Brief",
]
for report in short_reports:
result = validate_report_completeness(report, min_length=500)
assert result.is_valid is False
def test_report_markdown_structure_quality(self):
"""Test that well-structured reports are recognized."""
well_structured_report = """
# Market Analysis for AAPL
## Executive Summary
Strong buy signal based on comprehensive analysis.
## Technical Indicators
| Indicator | Value | Signal |
|-----------|-------|--------|
| RSI | 45 | Neutral|
| MACD | +2.3 | Buy |
## Fundamental Analysis
- Revenue growth: 15% YoY
- P/E ratio: 25 (reasonable for tech)
- Strong balance sheet
## Conclusion
""" + "Detailed conclusion. " * 50
result = validate_report_completeness(
well_structured_report,
min_length=500,
require_markdown_tables=True,
require_sections=True,
)
assert result.is_valid is True
assert result.metrics["markdown_tables"] > 0
assert result.metrics["section_headers"] >= 3
assert result.metrics["has_bullet_points"] is True
def test_decision_clarity_with_reasoning(self):
"""Test that clear decisions with reasoning are validated."""
clear_decisions = [
"BUY: Strong fundamentals (P/E 20), positive momentum (RSI 55), bullish sentiment",
"SELL: Overvalued at current P/E of 45, declining revenue, negative news",
"HOLD: Mixed signals - good fundamentals but uncertain market conditions",
]
for decision in clear_decisions:
result = validate_decision_quality(decision)
assert result.is_valid is True
assert result.metrics["has_reasoning"] is True
assert len(result.warnings) == 0 # Clear decisions shouldn't warn
def test_decision_ambiguity_detection(self):
"""Test detection of ambiguous decisions."""
ambiguous_decisions = [
"BUY or SELL, not sure",
"Maybe HOLD, could be BUY",
"SELL but also considering BUY",
]
for decision in ambiguous_decisions:
result = validate_decision_quality(decision)
# Should still extract first signal
assert result.metrics["signal"] is not None
# But should warn about ambiguity
assert len(result.warnings) > 0
def test_report_content_variety_indicators(self):
"""Test that reports with varied content structure are recognized."""
varied_report = """
# Comprehensive Analysis
## Overview
Multiple content types present.
## Data Table
| Metric | Q1 | Q2 | Q3 | Q4 |
|--------|----|----|----|----|
| Revenue| 10M| 12M| 15M| 18M|
## Key Points
- Point 1
- Point 2
* Point 3
## Details
""" + "Additional detailed analysis. " * 50
result = validate_report_completeness(varied_report, min_length=500)
assert result.is_valid is True
assert result.metrics["markdown_tables"] > 0
assert result.metrics["section_headers"] > 0
assert result.metrics["has_bullet_points"] is True
# No warnings about lacking structure
assert not any("structured" in w.lower() for w in result.warnings)
# ============================================================================
# Test State Integrity
# ============================================================================
class TestStateIntegrity:
"""Test integrity and consistency of agent state."""
def test_all_required_fields_present(self, sample_agent_state_buy):
"""Test that all required fields are present in state."""
state = sample_agent_state_buy
required_fields = [
"company_of_interest",
"trade_date",
"market_report",
"sentiment_report",
"news_report",
"fundamentals_report",
"investment_debate_state",
"risk_debate_state",
"final_trade_decision",
]
for field in required_fields:
assert field in state, f"Missing required field: {field}"
def test_debate_state_internal_consistency(self, sample_invest_debate):
"""Test internal consistency of debate state."""
debate = sample_invest_debate
result = validate_debate_state(debate, debate_type="invest")
assert result.is_valid is True
# Count should match history length (approximately)
assert result.metrics["count"] > 0
assert result.metrics["history_length"] > 0
def test_final_decision_aligns_with_debates(self, sample_agent_state_buy):
"""Test that final decision aligns with debate conclusions."""
state = sample_agent_state_buy
invest_debate = state["investment_debate_state"]
risk_debate = state["risk_debate_state"]
final_decision = state["final_trade_decision"]
# Extract all signals
invest_result = validate_debate_state(invest_debate, debate_type="invest")
risk_result = validate_debate_state(risk_debate, debate_type="risk")
final_result = validate_decision_quality(final_decision)
# All should be BUY for this scenario
assert invest_result.metrics.get("judge_signal") == "BUY"
assert risk_result.metrics.get("judge_signal") in ["BUY", "HOLD"]
assert final_result.metrics["signal"] == "BUY"
def test_state_preserves_company_context(self, sample_agent_state_buy):
"""Test that company context is preserved throughout state."""
state = sample_agent_state_buy
company = state["company_of_interest"]
trade_date = state["trade_date"]
# Verify basic context
assert isinstance(company, str)
assert len(company) > 0
assert isinstance(trade_date, str)
assert len(trade_date) > 0
def test_debate_history_chronological_consistency(self, sample_invest_debate):
"""Test that debate history appears chronologically consistent."""
debate = sample_invest_debate
history = debate["history"]
count = debate["count"]
# History should exist if count > 0
if count > 0:
assert len(history) > 0
# If multiple rounds, history should reflect that
if count >= 2:
# Should have multiple segments or rounds
assert len(history) > 50 # Reasonable minimum for 2+ rounds
def test_type_consistency_across_state(self, sample_agent_state_buy):
"""Test that all fields have correct types."""
state = sample_agent_state_buy
# String fields
string_fields = [
"company_of_interest",
"trade_date",
"market_report",
"sentiment_report",
"news_report",
"fundamentals_report",
"final_trade_decision",
]
for field in string_fields:
if field in state:
assert isinstance(state[field], str), f"{field} should be string"
# Dict fields
dict_fields = ["investment_debate_state", "risk_debate_state"]
for field in dict_fields:
if field in state:
assert isinstance(state[field], dict), f"{field} should be dict"
def test_empty_state_detection(self):
"""Test detection of completely empty state."""
empty_state = {}
result = validate_agent_state(empty_state)
assert result.is_valid is False
assert len(result.errors) >= 2 # At least missing company and date

View File

@ -0,0 +1,700 @@
"""
Test suite for Output Validation Utilities.
This module tests:
1. ValidationResult dataclass behavior
2. Report completeness validation (length, markdown, sections)
3. Decision quality validation (signal extraction, reasoning)
4. Debate state validation (history, count, judge_decision)
5. Complete agent state validation (orchestration)
All tests use mocked data (no real API calls).
"""
import pytest
from typing import Dict, Any
from tradingagents.utils.output_validator import (
ValidationResult,
validate_report_completeness,
validate_decision_quality,
validate_debate_state,
validate_agent_state,
)
pytestmark = pytest.mark.unit
# ============================================================================
# Test ValidationResult Dataclass
# ============================================================================
class TestValidationResult:
"""Test ValidationResult dataclass behavior."""
def test_default_valid_result(self):
"""Test ValidationResult defaults to valid with empty lists."""
result = ValidationResult(is_valid=True)
assert result.is_valid is True
assert result.errors == []
assert result.warnings == []
assert result.metrics == {}
def test_add_error_marks_invalid(self):
"""Test that add_error() marks result as invalid."""
result = ValidationResult(is_valid=True)
result.add_error("Something went wrong")
assert result.is_valid is False
assert len(result.errors) == 1
assert result.errors[0] == "Something went wrong"
def test_add_warning_keeps_valid(self):
"""Test that add_warning() doesn't change validity."""
result = ValidationResult(is_valid=True)
result.add_warning("This could be better")
assert result.is_valid is True
assert len(result.warnings) == 1
assert result.warnings[0] == "This could be better"
def test_add_metric(self):
"""Test that add_metric() stores key-value pairs."""
result = ValidationResult(is_valid=True)
result.add_metric("length", 500)
result.add_metric("signal", "BUY")
assert result.metrics["length"] == 500
assert result.metrics["signal"] == "BUY"
def test_multiple_errors_and_warnings(self):
"""Test accumulating multiple errors and warnings."""
result = ValidationResult(is_valid=True)
result.add_error("Error 1")
result.add_error("Error 2")
result.add_warning("Warning 1")
result.add_warning("Warning 2")
assert result.is_valid is False
assert len(result.errors) == 2
assert len(result.warnings) == 2
# ============================================================================
# Test Report Validation
# ============================================================================
class TestReportValidation:
"""Test validate_report_completeness() function."""
def test_valid_report_passes(self):
"""Test that a valid report passes validation."""
report = "# Market Analysis\n\n" + "This is a comprehensive report. " * 50
result = validate_report_completeness(report, min_length=500)
assert result.is_valid is True
assert len(result.errors) == 0
assert result.metrics["length"] > 500
def test_none_report_fails(self):
"""Test that None report fails validation."""
result = validate_report_completeness(None)
assert result.is_valid is False
assert "None" in result.errors[0]
def test_empty_report_fails(self):
"""Test that empty report fails validation."""
result = validate_report_completeness("")
assert result.is_valid is False
assert "empty" in result.errors[0].lower()
def test_short_report_fails(self):
"""Test that report below min_length fails."""
short_report = "Too short"
result = validate_report_completeness(short_report, min_length=500)
assert result.is_valid is False
assert any("minimum" in err.lower() for err in result.errors)
assert result.metrics["length"] < 500
def test_wrong_type_fails(self):
"""Test that non-string report fails validation."""
result = validate_report_completeness(123)
assert result.is_valid is False
assert "string" in result.errors[0].lower()
def test_markdown_table_detection(self):
"""Test detection of markdown tables."""
report_with_table = """
# Analysis
| Metric | Value |
|--------|-------|
| Price | $100 |
| Volume | 1M |
""" + "Additional text. " * 50
result = validate_report_completeness(
report_with_table,
min_length=200,
require_markdown_tables=True
)
assert result.is_valid is True
assert result.metrics["markdown_tables"] > 0
def test_missing_markdown_table_fails_when_required(self):
"""Test that missing markdown tables fails when required."""
report = "# Analysis\n\n" + "No tables here. " * 50
result = validate_report_completeness(
report,
min_length=200,
require_markdown_tables=True
)
assert result.is_valid is False
assert any("table" in err.lower() for err in result.errors)
def test_section_header_detection(self):
"""Test detection of section headers."""
report_with_headers = """
# Main Title
## Subsection
### Details
Content here.
""" + "More content. " * 50
result = validate_report_completeness(
report_with_headers,
min_length=200,
require_sections=True
)
assert result.is_valid is True
assert result.metrics["section_headers"] >= 3
def test_missing_sections_fails_when_required(self):
"""Test that missing sections fails when required."""
report = "Just plain text. " * 50
result = validate_report_completeness(
report,
min_length=200,
require_sections=True
)
assert result.is_valid is False
assert any("section" in err.lower() for err in result.errors)
def test_short_report_warning(self):
"""Test warning for relatively short reports."""
# Report is above min but below 1.5x min
report = "Short but valid. " * 40 # ~680 chars
result = validate_report_completeness(report, min_length=500)
assert result.is_valid is True
assert len(result.warnings) > 0
assert any("short" in warn.lower() for warn in result.warnings)
def test_bullet_point_detection(self):
"""Test detection of bullet points."""
report_with_bullets = """
# Analysis
- Point 1
- Point 2
* Point 3
""" + "Additional content. " * 50
result = validate_report_completeness(report_with_bullets, min_length=200)
assert result.metrics["has_bullet_points"] is True
def test_unstructured_content_warning(self):
"""Test warning for content lacking structure."""
unstructured_report = "Just a long stream of text without any structure. " * 50
result = validate_report_completeness(unstructured_report, min_length=500)
assert result.is_valid is True
assert any("structured" in warn.lower() for warn in result.warnings)
# ============================================================================
# Test Decision Validation
# ============================================================================
class TestDecisionValidation:
"""Test validate_decision_quality() function."""
def test_valid_buy_decision(self):
"""Test that valid BUY decision passes."""
decision = "BUY: Strong fundamentals and positive momentum"
result = validate_decision_quality(decision)
assert result.is_valid is True
assert result.metrics["signal"] == "BUY"
assert result.metrics["has_reasoning"] is True
def test_valid_sell_decision(self):
"""Test that valid SELL decision passes."""
decision = "SELL: Overvalued with deteriorating fundamentals"
result = validate_decision_quality(decision)
assert result.is_valid is True
assert result.metrics["signal"] == "SELL"
def test_valid_hold_decision(self):
"""Test that valid HOLD decision passes."""
decision = "HOLD: Mixed signals, awaiting clarity"
result = validate_decision_quality(decision)
assert result.is_valid is True
assert result.metrics["signal"] == "HOLD"
def test_case_insensitive_signal_extraction(self):
"""Test that signals are extracted case-insensitively."""
decisions = [
"buy the stock",
"BUY the stock",
"Buy the stock",
"We should buy",
]
for decision in decisions:
result = validate_decision_quality(decision)
assert result.metrics["signal"] == "BUY"
def test_none_decision_fails(self):
"""Test that None decision fails validation."""
result = validate_decision_quality(None)
assert result.is_valid is False
assert "None" in result.errors[0]
def test_empty_decision_fails(self):
"""Test that empty decision fails validation."""
result = validate_decision_quality("")
assert result.is_valid is False
assert "empty" in result.errors[0].lower()
def test_no_signal_fails(self):
"""Test that decision without signal fails."""
decision = "This is a decision without a clear signal"
result = validate_decision_quality(decision)
assert result.is_valid is False
assert any("signal" in err.lower() for err in result.errors)
assert result.metrics["signal"] is None
def test_wrong_type_fails(self):
"""Test that non-string decision fails."""
result = validate_decision_quality({"decision": "BUY"})
assert result.is_valid is False
assert "string" in result.errors[0].lower()
def test_multiple_signals_warning(self):
"""Test warning for multiple conflicting signals."""
decision = "BUY or maybe SELL, hard to decide, could HOLD"
result = validate_decision_quality(decision)
# Should still extract first signal
assert result.metrics["signal"] == "BUY"
# But warn about conflicts
assert len(result.warnings) > 0
assert any("conflicting" in warn.lower() for warn in result.warnings)
def test_short_decision_warning(self):
"""Test warning for very short decisions."""
decision = "BUY"
result = validate_decision_quality(decision)
assert result.is_valid is True
assert len(result.warnings) > 0
assert any("short" in warn.lower() for warn in result.warnings)
def test_decision_with_reasoning_markers(self):
"""Test that reasoning markers are detected."""
decisions_with_reasoning = [
"BUY: Strong fundamentals",
"SELL. Company is overvalued.",
"HOLD because market is uncertain",
]
for decision in decisions_with_reasoning:
result = validate_decision_quality(decision)
assert result.metrics["has_reasoning"] is True
def test_signal_count_metric(self):
"""Test that signal_count metric is accurate."""
decision = "BUY BUY BUY! Strong signal to buy"
result = validate_decision_quality(decision)
assert result.metrics["signal_count"] == 4
assert result.metrics["signal"] == "BUY"
# ============================================================================
# Test Debate State Validation
# ============================================================================
class TestDebateStateValidation:
"""Test validate_debate_state() function."""
def test_valid_invest_debate_state(self):
"""Test that valid invest debate state passes."""
debate_state = {
"history": "Round 1: Bull argues...\nRound 2: Bear counters...",
"count": 2,
"judge_decision": "BUY: Bulls made stronger case",
"bull_history": "Bull argument",
"bear_history": "Bear argument",
}
result = validate_debate_state(debate_state, debate_type="invest")
assert result.is_valid is True
assert result.metrics["history_length"] > 0
assert result.metrics["count"] == 2
assert result.metrics["judge_signal"] == "BUY"
def test_valid_risk_debate_state(self):
"""Test that valid risk debate state passes."""
debate_state = {
"history": "Round 1: Risky argues...\nRound 2: Safe counters...",
"count": 2,
"judge_decision": "HOLD: Balanced risk profile",
"risky_history": "Risky argument",
"safe_history": "Safe argument",
"neutral_history": "Neutral argument",
}
result = validate_debate_state(debate_state, debate_type="risk")
assert result.is_valid is True
assert result.metrics["count"] == 2
def test_none_debate_state_fails(self):
"""Test that None debate state fails."""
result = validate_debate_state(None)
assert result.is_valid is False
assert "None" in result.errors[0]
def test_wrong_type_fails(self):
"""Test that non-dict debate state fails."""
result = validate_debate_state("not a dict")
assert result.is_valid is False
assert "dict" in result.errors[0].lower()
def test_missing_required_fields_fails(self):
"""Test that missing required fields fails."""
incomplete_state = {
"history": "Some history",
# Missing count and judge_decision
}
result = validate_debate_state(incomplete_state)
assert result.is_valid is False
assert any("missing" in err.lower() for err in result.errors)
def test_invalid_debate_type_fails(self):
"""Test that unknown debate type fails."""
debate_state = {
"history": "History",
"count": 1,
"judge_decision": "BUY",
}
result = validate_debate_state(debate_state, debate_type="unknown")
assert result.is_valid is False
assert "unknown" in result.errors[0].lower()
def test_empty_history_warning(self):
"""Test warning for empty history."""
debate_state = {
"history": "",
"count": 0,
"judge_decision": "HOLD",
}
result = validate_debate_state(debate_state)
assert result.is_valid is True
assert any("empty" in warn.lower() for warn in result.warnings)
def test_negative_count_fails(self):
"""Test that negative count fails."""
debate_state = {
"history": "History",
"count": -1,
"judge_decision": "BUY",
}
result = validate_debate_state(debate_state)
assert result.is_valid is False
assert any("negative" in err.lower() for err in result.errors)
def test_high_count_warning(self):
"""Test warning for very high debate count."""
debate_state = {
"history": "Long debate...",
"count": 15,
"judge_decision": "SELL",
}
result = validate_debate_state(debate_state)
assert result.is_valid is True
assert any("high" in warn.lower() for warn in result.warnings)
def test_invalid_judge_decision_warning(self):
"""Test warning for poor quality judge decision."""
debate_state = {
"history": "History",
"count": 2,
"judge_decision": "No clear signal here",
}
result = validate_debate_state(debate_state)
assert result.is_valid is True
assert len(result.warnings) > 0
def test_optional_fields_metric(self):
"""Test that optional fields are counted."""
debate_state = {
"history": "History",
"count": 1,
"judge_decision": "BUY",
"bull_history": "Bull",
"bear_history": "Bear",
}
result = validate_debate_state(debate_state, debate_type="invest")
assert result.metrics["optional_fields_present"] >= 2
def test_wrong_history_type_fails(self):
"""Test that non-string history fails."""
debate_state = {
"history": 123,
"count": 1,
"judge_decision": "BUY",
}
result = validate_debate_state(debate_state)
assert result.is_valid is False
assert any("string" in err.lower() for err in result.errors)
def test_wrong_count_type_fails(self):
"""Test that non-int count fails."""
debate_state = {
"history": "History",
"count": "two",
"judge_decision": "BUY",
}
result = validate_debate_state(debate_state)
assert result.is_valid is False
assert any("int" in err.lower() for err in result.errors)
# ============================================================================
# Test Agent State Validation
# ============================================================================
class TestAgentStateValidation:
"""Test validate_agent_state() function."""
def test_valid_complete_agent_state(self):
"""Test that complete valid agent state passes."""
state = {
"company_of_interest": "AAPL",
"trade_date": "2024-01-15",
"market_report": "# Market Analysis\n\n" + "Detailed analysis. " * 100,
"sentiment_report": "# Sentiment Report\n\n" + "Social sentiment. " * 100,
"news_report": "# News Report\n\n" + "Latest news. " * 100,
"fundamentals_report": "# Fundamentals\n\n" + "Financial data. " * 100,
"investment_debate_state": {
"history": "Debate history",
"count": 3,
"judge_decision": "BUY: Strong case",
},
"risk_debate_state": {
"history": "Risk debate",
"count": 2,
"judge_decision": "HOLD: Moderate risk",
},
"final_trade_decision": "BUY: All signals align positively",
}
result = validate_agent_state(state)
assert result.is_valid is True
assert result.metrics["company_of_interest"] == "AAPL"
assert result.metrics["trade_date"] == "2024-01-15"
assert result.metrics["reports_present"] == 4
assert result.metrics["final_signal"] == "BUY"
def test_none_state_fails(self):
"""Test that None state fails."""
result = validate_agent_state(None)
assert result.is_valid is False
assert "None" in result.errors[0]
def test_wrong_type_fails(self):
"""Test that non-dict state fails."""
result = validate_agent_state("not a dict")
assert result.is_valid is False
assert "dict" in result.errors[0].lower()
def test_missing_company_fails(self):
"""Test that missing company fails."""
state = {
"trade_date": "2024-01-15",
}
result = validate_agent_state(state)
assert result.is_valid is False
assert any("company" in err.lower() for err in result.errors)
def test_missing_trade_date_fails(self):
"""Test that missing trade date fails."""
state = {
"company_of_interest": "AAPL",
}
result = validate_agent_state(state)
assert result.is_valid is False
assert any("trade_date" in err.lower() for err in result.errors)
def test_incomplete_reports_warning(self):
"""Test warning when some reports are missing."""
state = {
"company_of_interest": "AAPL",
"trade_date": "2024-01-15",
"market_report": "Market analysis. " * 100,
# Missing other reports
}
result = validate_agent_state(state)
# Basic fields present, so valid
assert result.is_valid is True
# But warn about missing reports
assert len(result.warnings) > 0
assert result.metrics["reports_present"] < 4
def test_invalid_report_warning(self):
"""Test warning for invalid report content."""
state = {
"company_of_interest": "AAPL",
"trade_date": "2024-01-15",
"market_report": "Too short", # Below min length
}
result = validate_agent_state(state)
assert result.is_valid is True
assert any("market_report" in warn.lower() for warn in result.warnings)
def test_invalid_invest_debate_warning(self):
"""Test warning for invalid investment debate."""
state = {
"company_of_interest": "AAPL",
"trade_date": "2024-01-15",
"investment_debate_state": {
# Missing required fields
"history": "History",
},
}
result = validate_agent_state(state)
assert result.is_valid is True
assert any("investment debate" in warn.lower() for warn in result.warnings)
def test_invalid_risk_debate_warning(self):
"""Test warning for invalid risk debate."""
state = {
"company_of_interest": "AAPL",
"trade_date": "2024-01-15",
"risk_debate_state": {
"count": -1, # Invalid
},
}
result = validate_agent_state(state)
assert result.is_valid is True
assert any("risk debate" in warn.lower() for warn in result.warnings)
def test_invalid_final_decision_warning(self):
"""Test warning for invalid final decision."""
state = {
"company_of_interest": "AAPL",
"trade_date": "2024-01-15",
"final_trade_decision": "No clear signal",
}
result = validate_agent_state(state)
assert result.is_valid is True
assert any("final decision" in warn.lower() for warn in result.warnings)
def test_incomplete_state_warning(self):
"""Test warning for very incomplete state."""
state = {
"company_of_interest": "AAPL",
"trade_date": "2024-01-15",
# No debates or decision
}
result = validate_agent_state(state)
assert result.is_valid is True
assert any("incomplete" in warn.lower() for warn in result.warnings)
def test_reports_count_metrics(self):
"""Test that report counts are tracked."""
state = {
"company_of_interest": "AAPL",
"trade_date": "2024-01-15",
"market_report": "Report. " * 100,
"sentiment_report": "Report. " * 100,
}
result = validate_agent_state(state)
assert result.metrics["reports_present"] == 2
assert result.metrics["total_reports_expected"] == 4

View File

@ -0,0 +1,453 @@
"""
Output validation utilities for agent outputs.
This module provides validation functions for:
- Report completeness (length, structure, markdown formatting)
- Decision quality (signal extraction, reasoning clarity)
- Debate state coherence (history tracking, judge decisions)
- Complete agent state validation
All validators return ValidationResult with actionable feedback.
"""
from dataclasses import dataclass, field
from typing import List, Optional, Dict, Any
import re
@dataclass
class ValidationResult:
"""
Result of a validation check with actionable feedback.
Attributes:
is_valid: True if validation passed, False otherwise
errors: List of error messages (validation failures)
warnings: List of warning messages (quality concerns)
metrics: Dictionary of measured metrics (e.g., length, counts)
"""
is_valid: bool
errors: List[str] = field(default_factory=list)
warnings: List[str] = field(default_factory=list)
metrics: Dict[str, Any] = field(default_factory=dict)
def add_error(self, message: str) -> None:
"""Add an error and mark validation as failed."""
self.errors.append(message)
self.is_valid = False
def add_warning(self, message: str) -> None:
"""Add a warning (doesn't fail validation)."""
self.warnings.append(message)
def add_metric(self, key: str, value: Any) -> None:
"""Add a measured metric."""
self.metrics[key] = value
def validate_report_completeness(
report: Optional[str],
min_length: int = 500,
require_markdown_tables: bool = False,
require_sections: bool = False,
) -> ValidationResult:
"""
Validate that a report is complete and well-structured.
Args:
report: The report text to validate
min_length: Minimum character count required (default: 500)
require_markdown_tables: Whether to require markdown tables
require_sections: Whether to require section headers (##)
Returns:
ValidationResult with errors, warnings, and metrics
Example:
>>> result = validate_report_completeness("# Report\\n\\nThis is too short")
>>> assert not result.is_valid
>>> assert "minimum length" in result.errors[0].lower()
"""
result = ValidationResult(is_valid=True)
# Check if report exists
if report is None:
result.add_error("Report is None")
return result
if not isinstance(report, str):
result.add_error(f"Report must be string, got {type(report).__name__}")
return result
# Check length
report_length = len(report.strip())
result.add_metric("length", report_length)
if report_length == 0:
result.add_error("Report is empty")
return result
if report_length < min_length:
result.add_error(
f"Report length ({report_length}) below minimum ({min_length})"
)
# Check for markdown tables
markdown_tables = re.findall(r'\|.*\|', report)
result.add_metric("markdown_tables", len(markdown_tables))
if require_markdown_tables and len(markdown_tables) == 0:
result.add_error("Report missing required markdown tables")
# Check for section headers (allow optional leading whitespace)
section_headers = re.findall(r'^\s*#{1,6}\s+.+$', report, re.MULTILINE)
result.add_metric("section_headers", len(section_headers))
if require_sections and len(section_headers) == 0:
result.add_error("Report missing required section headers")
# Quality warnings
if report_length < min_length * 1.5:
result.add_warning(
f"Report is relatively short ({report_length} chars). "
f"Consider adding more detail."
)
# Check for basic structure indicators
has_bullet_points = bool(re.search(r'^\s*[-*]\s+', report, re.MULTILINE))
result.add_metric("has_bullet_points", has_bullet_points)
if not has_bullet_points and not markdown_tables:
result.add_warning("Report lacks structured content (no bullets or tables)")
return result
def validate_decision_quality(decision: Optional[str]) -> ValidationResult:
"""
Validate trading decision quality and extract signal.
Validates:
- Decision is not None/empty
- Contains clear BUY/SELL/HOLD signal
- Has reasoning/explanation
- Signal is unambiguous
Args:
decision: The decision text to validate
Returns:
ValidationResult with extracted signal in metrics
Example:
>>> result = validate_decision_quality("BUY: Strong fundamentals")
>>> assert result.is_valid
>>> assert result.metrics["signal"] == "BUY"
"""
result = ValidationResult(is_valid=True)
# Check if decision exists
if decision is None:
result.add_error("Decision is None")
return result
if not isinstance(decision, str):
result.add_error(f"Decision must be string, got {type(decision).__name__}")
return result
decision_clean = decision.strip()
if not decision_clean:
result.add_error("Decision is empty")
return result
result.add_metric("length", len(decision_clean))
# Extract trading signal (case-insensitive)
signal_pattern = r'\b(BUY|SELL|HOLD)\b'
matches = re.findall(signal_pattern, decision_clean, re.IGNORECASE)
if not matches:
result.add_error(
"No clear trading signal found (expected BUY, SELL, or HOLD)"
)
result.add_metric("signal", None)
return result
# Get first signal and normalize to uppercase
signal = matches[0].upper()
result.add_metric("signal", signal)
result.add_metric("signal_count", len(matches))
# Warn if multiple conflicting signals
unique_signals = set(m.upper() for m in matches)
if len(unique_signals) > 1:
result.add_warning(
f"Multiple conflicting signals found: {unique_signals}. "
f"Using first occurrence: {signal}"
)
# Check for reasoning
# Split by common delimiters and check if there's explanation
has_reasoning = any([
':' in decision_clean,
'.' in decision_clean,
len(decision_clean.split()) >= 5,
])
result.add_metric("has_reasoning", has_reasoning)
if not has_reasoning:
result.add_warning(
"Decision lacks clear reasoning or explanation"
)
# Check decision length
if len(decision_clean) < 20:
result.add_warning(
f"Decision is very short ({len(decision_clean)} chars). "
f"Consider adding more rationale."
)
return result
def validate_debate_state(
debate_state: Optional[Dict[str, Any]],
debate_type: str = "invest",
) -> ValidationResult:
"""
Validate debate state structure and coherence.
Validates:
- Required fields present (history, count, judge_decision)
- History is not empty
- Count is reasonable (>= 0)
- Judge decision exists if debate concluded
Args:
debate_state: The debate state dictionary to validate
debate_type: Type of debate ("invest" or "risk")
Returns:
ValidationResult with debate metrics
Example:
>>> state = {"history": "Round 1...", "count": 1, "judge_decision": "BUY"}
>>> result = validate_debate_state(state)
>>> assert result.is_valid
"""
result = ValidationResult(is_valid=True)
# Check if state exists
if debate_state is None:
result.add_error("Debate state is None")
return result
if not isinstance(debate_state, dict):
result.add_error(
f"Debate state must be dict, got {type(debate_state).__name__}"
)
return result
# Define required fields based on debate type
if debate_type == "invest":
required_fields = ["history", "count", "judge_decision"]
optional_fields = ["bull_history", "bear_history", "current_response"]
elif debate_type == "risk":
required_fields = ["history", "count", "judge_decision"]
optional_fields = [
"risky_history",
"safe_history",
"neutral_history",
"latest_speaker",
"current_risky_response",
"current_safe_response",
"current_neutral_response",
]
else:
result.add_error(f"Unknown debate type: {debate_type}")
return result
# Check required fields
missing_fields = [f for f in required_fields if f not in debate_state]
if missing_fields:
result.add_error(f"Missing required fields: {missing_fields}")
return result
# Validate history
history = debate_state.get("history")
if history is not None:
if not isinstance(history, str):
result.add_error(
f"History must be string, got {type(history).__name__}"
)
elif not history.strip():
result.add_warning("History is empty")
else:
result.add_metric("history_length", len(history))
# Validate count
count = debate_state.get("count")
if count is not None:
if not isinstance(count, int):
result.add_error(f"Count must be int, got {type(count).__name__}")
elif count < 0:
result.add_error(f"Count cannot be negative: {count}")
else:
result.add_metric("count", count)
# Warn if debate went too long
if count > 10:
result.add_warning(
f"Debate count is very high ({count}). "
f"May indicate convergence issues."
)
# Validate judge decision
judge_decision = debate_state.get("judge_decision")
if judge_decision is not None:
if isinstance(judge_decision, str):
if judge_decision.strip():
# Validate decision quality
decision_result = validate_decision_quality(judge_decision)
if not decision_result.is_valid:
result.add_warning(
f"Judge decision has quality issues: "
f"{', '.join(decision_result.errors)}"
)
else:
result.add_metric("judge_signal", decision_result.metrics.get("signal"))
else:
result.add_warning("Judge decision is empty")
else:
result.add_error(
f"Judge decision must be string, got {type(judge_decision).__name__}"
)
# Check optional fields for completeness
present_optional = [f for f in optional_fields if f in debate_state]
result.add_metric("optional_fields_present", len(present_optional))
return result
def validate_agent_state(state: Optional[Dict[str, Any]]) -> ValidationResult:
"""
Validate complete agent state structure.
Orchestrates all validators to check:
- Company and trade date present
- All reports complete
- Investment debate state valid
- Risk debate state valid
- Final decision quality
Args:
state: The complete agent state dictionary
Returns:
ValidationResult with comprehensive validation
Example:
>>> state = {
... "company_of_interest": "AAPL",
... "trade_date": "2024-01-15",
... "market_report": "Market analysis..." * 100,
... }
>>> result = validate_agent_state(state)
>>> assert "company_of_interest" in result.metrics
"""
result = ValidationResult(is_valid=True)
# Check if state exists
if state is None:
result.add_error("Agent state is None")
return result
if not isinstance(state, dict):
result.add_error(f"Agent state must be dict, got {type(state).__name__}")
return result
# Validate basic fields
company = state.get("company_of_interest")
if not company:
result.add_error("Missing company_of_interest")
else:
result.add_metric("company_of_interest", company)
trade_date = state.get("trade_date")
if not trade_date:
result.add_error("Missing trade_date")
else:
result.add_metric("trade_date", trade_date)
# Validate reports
report_fields = [
"market_report",
"sentiment_report",
"news_report",
"fundamentals_report",
]
reports_present = 0
for report_field in report_fields:
report = state.get(report_field)
if report:
reports_present += 1
report_result = validate_report_completeness(
report,
min_length=500,
require_markdown_tables=False,
require_sections=False,
)
if not report_result.is_valid:
result.add_warning(
f"{report_field} has issues: {', '.join(report_result.errors)}"
)
result.add_metric("reports_present", reports_present)
result.add_metric("total_reports_expected", len(report_fields))
if reports_present < len(report_fields):
result.add_warning(
f"Only {reports_present}/{len(report_fields)} reports present"
)
# Validate investment debate state
invest_debate = state.get("investment_debate_state")
if invest_debate:
invest_result = validate_debate_state(invest_debate, debate_type="invest")
if not invest_result.is_valid:
result.add_warning(
f"Investment debate has issues: {', '.join(invest_result.errors)}"
)
result.add_metric("investment_debate_valid", invest_result.is_valid)
# Validate risk debate state
risk_debate = state.get("risk_debate_state")
if risk_debate:
risk_result = validate_debate_state(risk_debate, debate_type="risk")
if not risk_result.is_valid:
result.add_warning(
f"Risk debate has issues: {', '.join(risk_result.errors)}"
)
result.add_metric("risk_debate_valid", risk_result.is_valid)
# Validate final decision
final_decision = state.get("final_trade_decision")
if final_decision:
decision_result = validate_decision_quality(final_decision)
if not decision_result.is_valid:
result.add_warning(
f"Final decision has issues: {', '.join(decision_result.errors)}"
)
else:
result.add_metric("final_signal", decision_result.metrics.get("signal"))
# Overall completeness check
if not invest_debate and not risk_debate:
result.add_warning(
"State appears incomplete: no debate states present"
)
return result