feat(tests): add UAT and evaluation tests for agent outputs - Fixes #53
- Created tradingagents/utils/output_validator.py with ValidationResult dataclass - Added validate_report_completeness(), validate_decision_quality() for content validation - Added validate_debate_state(), validate_agent_state() for state coherence - Created tests/unit/test_output_validators.py with 54 unit tests - Created tests/e2e/test_uat_agent_outputs.py with 23 UAT scenarios - Added agent state fixtures to tests/conftest.py (sample_agent_state, debates) - Total: 77 tests covering report quality, signal extraction, and state integrity 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
b4653ca37b
commit
e5575250df
|
|
@ -383,3 +383,271 @@ def openrouter_config():
|
|||
"backend_url": "https://openrouter.ai/api/v1",
|
||||
})
|
||||
return config
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Agent Output Validation Fixtures (Issue #53)
|
||||
# ============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def sample_agent_state():
|
||||
"""
|
||||
Create a complete sample agent state for testing.
|
||||
|
||||
Provides a fully populated agent state with all required fields
|
||||
including reports, debate states, and final decision.
|
||||
|
||||
Scope: function (default)
|
||||
|
||||
Returns:
|
||||
dict: Complete agent state with all fields populated
|
||||
|
||||
Example:
|
||||
def test_complete_state(sample_agent_state):
|
||||
assert sample_agent_state["company_of_interest"] == "AAPL"
|
||||
assert "market_report" in sample_agent_state
|
||||
"""
|
||||
return {
|
||||
"company_of_interest": "AAPL",
|
||||
"trade_date": "2024-01-15",
|
||||
"market_report": """
|
||||
# Market Analysis for AAPL
|
||||
|
||||
## Technical Indicators
|
||||
Strong bullish momentum with RSI at 55 and MACD showing positive divergence.
|
||||
Price has broken through key resistance at $175.
|
||||
|
||||
## Volume Analysis
|
||||
Above-average volume on recent upward moves indicates strong buyer interest.
|
||||
Institutional accumulation pattern observed over the past 2 weeks.
|
||||
|
||||
## Price Action
|
||||
Clear higher highs and higher lows pattern establishing uptrend.
|
||||
Support level established at $170 with strong buying pressure.
|
||||
""" + "Additional detailed analysis. " * 30,
|
||||
"sentiment_report": """
|
||||
# Social Media Sentiment Analysis
|
||||
|
||||
## Overall Sentiment
|
||||
Strongly positive sentiment across major platforms (Twitter, Reddit, StockTwits).
|
||||
Sentiment score: 8.5/10 based on 10,000+ analyzed posts.
|
||||
|
||||
## Key Themes
|
||||
- New product launch excitement
|
||||
- Strong quarterly earnings anticipation
|
||||
- Innovation leadership recognition
|
||||
|
||||
## Influencer Activity
|
||||
Major tech influencers bullish on near-term prospects.
|
||||
""" + "More sentiment details. " * 30,
|
||||
"news_report": """
|
||||
# News Analysis
|
||||
|
||||
## Recent Headlines
|
||||
- Major product announcement driving positive coverage
|
||||
- Analyst upgrades from 3 top firms this week
|
||||
- Partnership announcements in AI space
|
||||
|
||||
## Coverage Tone
|
||||
85% positive, 10% neutral, 5% negative across 50 major news sources.
|
||||
|
||||
## Impact Assessment
|
||||
News flow strongly supportive of bullish thesis.
|
||||
""" + "Additional news analysis. " * 30,
|
||||
"fundamentals_report": """
|
||||
# Fundamental Analysis
|
||||
|
||||
## Financial Metrics
|
||||
| Metric | Value | Industry Avg |
|
||||
|--------|-------|--------------|
|
||||
| P/E | 28 | 25 |
|
||||
| ROE | 45% | 20% |
|
||||
| Revenue Growth | 12% | 8% |
|
||||
|
||||
## Balance Sheet
|
||||
Strong cash position of $150B, low debt-to-equity ratio.
|
||||
|
||||
## Earnings Quality
|
||||
Consistent earnings growth with strong margins.
|
||||
""" + "Detailed fundamental analysis. " * 30,
|
||||
"investment_debate_state": {
|
||||
"history": "Round 1: Bull presents case for strong buy...\nRound 2: Bear raises concerns about valuation...\nRound 3: Bull counters with growth prospects...",
|
||||
"count": 3,
|
||||
"judge_decision": "BUY: Bulls made a compelling case with strong fundamentals and positive momentum",
|
||||
"bull_history": "Strong fundamentals, positive momentum, innovation leadership",
|
||||
"bear_history": "Slight valuation concerns, market volatility risk",
|
||||
"current_response": "Final recommendation is BUY",
|
||||
},
|
||||
"risk_debate_state": {
|
||||
"history": "Round 1: Risk assessment begins...\nRound 2: Conservative view presented...",
|
||||
"count": 2,
|
||||
"judge_decision": "BUY: Risk is acceptable given strong fundamentals",
|
||||
"risky_history": "High potential upside justifies position",
|
||||
"safe_history": "Proceed with caution, good fundamentals",
|
||||
"neutral_history": "Balanced risk-reward at current levels",
|
||||
"latest_speaker": "neutral",
|
||||
"current_risky_response": "Strong buy",
|
||||
"current_safe_response": "Moderate buy",
|
||||
"current_neutral_response": "Buy with standard position sizing",
|
||||
},
|
||||
"final_trade_decision": "BUY: Strong consensus across all analysis teams. Fundamentals solid, technicals bullish, sentiment positive. Entry at current levels recommended with standard position sizing.",
|
||||
"investment_plan": "Initiate position with 2% portfolio allocation",
|
||||
"trader_investment_plan": "Execute market order for calculated position size",
|
||||
"sender": "trader",
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_agent_state_buy(sample_agent_state):
|
||||
"""
|
||||
Sample agent state with BUY decision.
|
||||
|
||||
Returns complete state configured for BUY scenario.
|
||||
|
||||
Scope: function (default)
|
||||
|
||||
Example:
|
||||
def test_buy_scenario(sample_agent_state_buy):
|
||||
assert "BUY" in sample_agent_state_buy["final_trade_decision"]
|
||||
"""
|
||||
return sample_agent_state
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_agent_state_sell():
|
||||
"""
|
||||
Sample agent state with SELL decision.
|
||||
|
||||
Provides a complete state where all analyses point to SELL.
|
||||
|
||||
Scope: function (default)
|
||||
|
||||
Returns:
|
||||
dict: Agent state with SELL decision
|
||||
|
||||
Example:
|
||||
def test_sell_scenario(sample_agent_state_sell):
|
||||
assert "SELL" in sample_agent_state_sell["final_trade_decision"]
|
||||
"""
|
||||
return {
|
||||
"company_of_interest": "TSLA",
|
||||
"trade_date": "2024-01-20",
|
||||
"market_report": "# Market Analysis\n\nBearish technical pattern with breakdown below support. " + "Detailed analysis. " * 50,
|
||||
"sentiment_report": "# Sentiment Analysis\n\nNegative sentiment prevailing across platforms. " + "More details. " * 50,
|
||||
"news_report": "# News Report\n\nMultiple negative headlines and analyst downgrades. " + "Additional coverage. " * 50,
|
||||
"fundamentals_report": "# Fundamentals\n\nDeteriorating metrics and earnings concerns. " + "Financial details. " * 50,
|
||||
"investment_debate_state": {
|
||||
"history": "Round 1: Bear presents strong sell case...\nRound 2: Bull unable to counter effectively...",
|
||||
"count": 2,
|
||||
"judge_decision": "SELL: Bears made compelling case with fundamental concerns",
|
||||
"bull_history": "Limited upside potential",
|
||||
"bear_history": "Strong downside risk, overvalued",
|
||||
},
|
||||
"risk_debate_state": {
|
||||
"history": "Round 1: Risk analysis shows high downside...",
|
||||
"count": 1,
|
||||
"judge_decision": "SELL: Exit position to preserve capital",
|
||||
"risky_history": "Too risky, exit recommended",
|
||||
"safe_history": "Definitely sell",
|
||||
"neutral_history": "Sell is prudent",
|
||||
},
|
||||
"final_trade_decision": "SELL: Consensus to exit position. Fundamentals weak, technicals bearish, sentiment negative.",
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_agent_state_hold():
|
||||
"""
|
||||
Sample agent state with HOLD decision.
|
||||
|
||||
Provides a complete state where analyses are mixed, leading to HOLD.
|
||||
|
||||
Scope: function (default)
|
||||
|
||||
Returns:
|
||||
dict: Agent state with HOLD decision
|
||||
|
||||
Example:
|
||||
def test_hold_scenario(sample_agent_state_hold):
|
||||
assert "HOLD" in sample_agent_state_hold["final_trade_decision"]
|
||||
"""
|
||||
return {
|
||||
"company_of_interest": "GOOGL",
|
||||
"trade_date": "2024-01-22",
|
||||
"market_report": "# Market Analysis\n\nMixed signals with consolidation pattern. " + "Technical details. " * 50,
|
||||
"sentiment_report": "# Sentiment Analysis\n\nNeutral sentiment, market awaiting catalyst. " + "Sentiment data. " * 50,
|
||||
"news_report": "# News Report\n\nBalanced news flow, no major catalysts. " + "News details. " * 50,
|
||||
"fundamentals_report": "# Fundamentals\n\nSolid but not compelling, fairly valued. " + "Financial data. " * 50,
|
||||
"investment_debate_state": {
|
||||
"history": "Round 1: Bull and Bear present balanced views...\nRound 2: No clear winner...\nRound 3: Continued debate...",
|
||||
"count": 3,
|
||||
"judge_decision": "HOLD: Insufficient conviction either way, maintain position",
|
||||
"bull_history": "Some positives but not strong",
|
||||
"bear_history": "Some concerns but not severe",
|
||||
},
|
||||
"risk_debate_state": {
|
||||
"history": "Round 1: Risk assessment shows balanced profile...",
|
||||
"count": 1,
|
||||
"judge_decision": "HOLD: Risk-reward balanced, no action needed",
|
||||
"risky_history": "Could go either way",
|
||||
"safe_history": "Wait for clarity",
|
||||
"neutral_history": "Hold is appropriate",
|
||||
},
|
||||
"final_trade_decision": "HOLD: Mixed signals across analysis teams. Await further clarity before making move.",
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_invest_debate():
|
||||
"""
|
||||
Sample investment debate state.
|
||||
|
||||
Provides a complete investment debate state for isolated testing.
|
||||
|
||||
Scope: function (default)
|
||||
|
||||
Returns:
|
||||
dict: Investment debate state (InvestDebateState)
|
||||
|
||||
Example:
|
||||
def test_debate(sample_invest_debate):
|
||||
assert sample_invest_debate["count"] > 0
|
||||
"""
|
||||
return {
|
||||
"history": "Round 1: Bull argues for strong buy based on fundamentals...\nRound 2: Bear raises valuation concerns...\nRound 3: Bull counters with growth prospects...",
|
||||
"count": 3,
|
||||
"judge_decision": "BUY: Bulls presented stronger evidence",
|
||||
"bull_history": "Strong fundamentals, positive technicals, good sentiment",
|
||||
"bear_history": "Valuation slightly stretched, some market risk",
|
||||
"current_response": "Recommend BUY with conviction",
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_risk_debate():
|
||||
"""
|
||||
Sample risk debate state.
|
||||
|
||||
Provides a complete risk debate state for isolated testing.
|
||||
|
||||
Scope: function (default)
|
||||
|
||||
Returns:
|
||||
dict: Risk debate state (RiskDebateState)
|
||||
|
||||
Example:
|
||||
def test_risk_debate(sample_risk_debate):
|
||||
assert sample_risk_debate["count"] > 0
|
||||
"""
|
||||
return {
|
||||
"history": "Round 1: Risk analysts evaluate position sizing...\nRound 2: Discussion on risk parameters...",
|
||||
"count": 2,
|
||||
"judge_decision": "BUY: Risk acceptable with standard position size",
|
||||
"risky_history": "Aggressive position justified by strong signals",
|
||||
"safe_history": "Conservative position appropriate given uncertainty",
|
||||
"neutral_history": "Standard position sizing recommended",
|
||||
"latest_speaker": "neutral",
|
||||
"current_risky_response": "Take larger position",
|
||||
"current_safe_response": "Take smaller position",
|
||||
"current_neutral_response": "Standard position is balanced",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,495 @@
|
|||
"""
|
||||
UAT (User Acceptance Testing) for Agent Output Quality.
|
||||
|
||||
This module provides end-to-end tests for complete agent workflows:
|
||||
1. Complete analysis workflow (BUY/SELL/HOLD scenarios)
|
||||
2. Edge case handling (missing data, conflicting reports)
|
||||
3. Content quality validation (length, structure, clarity)
|
||||
4. State integrity checks (field presence, debate coherence)
|
||||
|
||||
All tests use mocked data to avoid real API calls.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from typing import Dict, Any
|
||||
|
||||
from tradingagents.utils.output_validator import (
|
||||
validate_agent_state,
|
||||
validate_decision_quality,
|
||||
validate_debate_state,
|
||||
validate_report_completeness,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.e2e
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test Complete Analysis Workflow
|
||||
# ============================================================================
|
||||
|
||||
class TestCompleteAnalysisWorkflow:
|
||||
"""Test complete agent analysis workflow for different trading scenarios."""
|
||||
|
||||
def test_buy_scenario_complete_workflow(self, sample_agent_state_buy):
|
||||
"""
|
||||
Test complete BUY scenario workflow.
|
||||
|
||||
Validates:
|
||||
- All reports generated
|
||||
- Investment debate concludes with BUY
|
||||
- Risk debate validates decision
|
||||
- Final decision is BUY with reasoning
|
||||
"""
|
||||
state = sample_agent_state_buy
|
||||
|
||||
# Validate complete state
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["company_of_interest"] == "AAPL"
|
||||
assert result.metrics["reports_present"] == 4
|
||||
assert result.metrics["final_signal"] == "BUY"
|
||||
assert result.metrics["investment_debate_valid"] is True
|
||||
assert result.metrics["risk_debate_valid"] is True
|
||||
|
||||
def test_sell_scenario_complete_workflow(self, sample_agent_state_sell):
|
||||
"""
|
||||
Test complete SELL scenario workflow.
|
||||
|
||||
Validates:
|
||||
- All reports generated
|
||||
- Investment debate concludes with SELL
|
||||
- Risk debate validates decision
|
||||
- Final decision is SELL with reasoning
|
||||
"""
|
||||
state = sample_agent_state_sell
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["final_signal"] == "SELL"
|
||||
assert result.metrics["reports_present"] == 4
|
||||
|
||||
def test_hold_scenario_complete_workflow(self, sample_agent_state_hold):
|
||||
"""
|
||||
Test complete HOLD scenario workflow.
|
||||
|
||||
Validates:
|
||||
- All reports generated
|
||||
- Investment debate is inconclusive or balanced
|
||||
- Risk debate recommends caution
|
||||
- Final decision is HOLD with reasoning
|
||||
"""
|
||||
state = sample_agent_state_hold
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["final_signal"] == "HOLD"
|
||||
|
||||
def test_workflow_preserves_debate_history(self, sample_agent_state_buy):
|
||||
"""Test that debate history is preserved throughout workflow."""
|
||||
state = sample_agent_state_buy
|
||||
|
||||
invest_debate = state["investment_debate_state"]
|
||||
risk_debate = state["risk_debate_state"]
|
||||
|
||||
# Validate both debates have history
|
||||
invest_result = validate_debate_state(invest_debate, debate_type="invest")
|
||||
risk_result = validate_debate_state(risk_debate, debate_type="risk")
|
||||
|
||||
assert invest_result.metrics["history_length"] > 0
|
||||
assert risk_result.metrics["history_length"] > 0
|
||||
assert invest_result.metrics["count"] > 0
|
||||
assert risk_result.metrics["count"] > 0
|
||||
|
||||
def test_workflow_all_reports_meet_quality_standards(self, sample_agent_state_buy):
|
||||
"""Test that all generated reports meet quality standards."""
|
||||
state = sample_agent_state_buy
|
||||
|
||||
reports = [
|
||||
state["market_report"],
|
||||
state["sentiment_report"],
|
||||
state["news_report"],
|
||||
state["fundamentals_report"],
|
||||
]
|
||||
|
||||
for report in reports:
|
||||
result = validate_report_completeness(
|
||||
report,
|
||||
min_length=500,
|
||||
require_markdown_tables=False,
|
||||
require_sections=False,
|
||||
)
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["length"] >= 500
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test Edge Case Scenarios
|
||||
# ============================================================================
|
||||
|
||||
class TestEdgeCaseScenarios:
|
||||
"""Test handling of edge cases and unusual scenarios."""
|
||||
|
||||
def test_missing_single_report_graceful_degradation(self):
|
||||
"""Test that workflow continues with one missing report."""
|
||||
state = {
|
||||
"company_of_interest": "TSLA",
|
||||
"trade_date": "2024-01-20",
|
||||
"market_report": "Market analysis. " * 100,
|
||||
"sentiment_report": "Sentiment analysis. " * 100,
|
||||
"news_report": "News analysis. " * 100,
|
||||
# Missing fundamentals_report
|
||||
"investment_debate_state": {
|
||||
"history": "Debate based on available data",
|
||||
"count": 3,
|
||||
"judge_decision": "HOLD: Incomplete data, proceeding cautiously",
|
||||
},
|
||||
"risk_debate_state": {
|
||||
"history": "Risk assessment",
|
||||
"count": 2,
|
||||
"judge_decision": "HOLD: Missing fundamentals increases uncertainty",
|
||||
},
|
||||
"final_trade_decision": "HOLD: Awaiting fundamental data",
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
# Should still be valid but with warnings
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["reports_present"] == 3
|
||||
assert len(result.warnings) > 0
|
||||
|
||||
def test_conflicting_debate_conclusions_warning(self):
|
||||
"""Test warning when investment and risk debates conflict."""
|
||||
state = {
|
||||
"company_of_interest": "GOOGL",
|
||||
"trade_date": "2024-01-22",
|
||||
"market_report": "Report. " * 100,
|
||||
"sentiment_report": "Report. " * 100,
|
||||
"news_report": "Report. " * 100,
|
||||
"fundamentals_report": "Report. " * 100,
|
||||
"investment_debate_state": {
|
||||
"history": "Bullish debate",
|
||||
"count": 2,
|
||||
"judge_decision": "BUY: Strong upside potential",
|
||||
},
|
||||
"risk_debate_state": {
|
||||
"history": "Risk concerns",
|
||||
"count": 2,
|
||||
"judge_decision": "SELL: Risk too high", # Conflicts with invest
|
||||
},
|
||||
"final_trade_decision": "HOLD: Conflicting signals from teams",
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is True
|
||||
# Different signals detected
|
||||
assert result.metrics.get("final_signal") == "HOLD"
|
||||
|
||||
def test_empty_debate_history_but_valid_decision(self):
|
||||
"""Test handling of empty debate history with valid decision."""
|
||||
state = {
|
||||
"company_of_interest": "MSFT",
|
||||
"trade_date": "2024-01-25",
|
||||
"market_report": "Report. " * 100,
|
||||
"investment_debate_state": {
|
||||
"history": "", # Empty history
|
||||
"count": 0,
|
||||
"judge_decision": "HOLD: Insufficient deliberation",
|
||||
},
|
||||
"final_trade_decision": "HOLD: More analysis needed",
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert len(result.warnings) > 0 # Should warn about empty history
|
||||
|
||||
def test_very_long_debate_convergence_issue(self):
|
||||
"""Test detection of debates that went too long."""
|
||||
state = {
|
||||
"company_of_interest": "NVDA",
|
||||
"trade_date": "2024-01-28",
|
||||
"market_report": "Report. " * 100,
|
||||
"investment_debate_state": {
|
||||
"history": "Round 1...\nRound 2...\n" * 15,
|
||||
"count": 15, # Very high count
|
||||
"judge_decision": "BUY: Finally reached consensus",
|
||||
},
|
||||
"final_trade_decision": "BUY: After extensive deliberation",
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is True
|
||||
# Should have warnings about high debate count
|
||||
invest_debate_result = validate_debate_state(
|
||||
state["investment_debate_state"],
|
||||
debate_type="invest"
|
||||
)
|
||||
assert len(invest_debate_result.warnings) > 0
|
||||
|
||||
def test_malformed_but_extractable_decision(self):
|
||||
"""Test extraction of signal from poorly formatted decision."""
|
||||
decisions = [
|
||||
"i think we should BUY this stock",
|
||||
"recommendation: buy",
|
||||
"buy!!!",
|
||||
"Final call is to buy the position",
|
||||
]
|
||||
|
||||
for decision in decisions:
|
||||
result = validate_decision_quality(decision)
|
||||
assert result.metrics["signal"] == "BUY"
|
||||
|
||||
def test_missing_all_debate_states(self):
|
||||
"""Test handling when no debates occurred."""
|
||||
state = {
|
||||
"company_of_interest": "META",
|
||||
"trade_date": "2024-02-01",
|
||||
"market_report": "Report. " * 100,
|
||||
# No debate states
|
||||
"final_trade_decision": "HOLD: No consensus reached",
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert len(result.warnings) > 0
|
||||
assert any("incomplete" in w.lower() for w in result.warnings)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test Content Quality
|
||||
# ============================================================================
|
||||
|
||||
class TestContentQuality:
|
||||
"""Test content quality validation across all outputs."""
|
||||
|
||||
def test_report_minimum_length_enforcement(self):
|
||||
"""Test that all reports meet minimum length requirements."""
|
||||
short_reports = [
|
||||
"Too short",
|
||||
"Also short",
|
||||
"Brief",
|
||||
]
|
||||
|
||||
for report in short_reports:
|
||||
result = validate_report_completeness(report, min_length=500)
|
||||
assert result.is_valid is False
|
||||
|
||||
def test_report_markdown_structure_quality(self):
|
||||
"""Test that well-structured reports are recognized."""
|
||||
well_structured_report = """
|
||||
# Market Analysis for AAPL
|
||||
|
||||
## Executive Summary
|
||||
Strong buy signal based on comprehensive analysis.
|
||||
|
||||
## Technical Indicators
|
||||
| Indicator | Value | Signal |
|
||||
|-----------|-------|--------|
|
||||
| RSI | 45 | Neutral|
|
||||
| MACD | +2.3 | Buy |
|
||||
|
||||
## Fundamental Analysis
|
||||
- Revenue growth: 15% YoY
|
||||
- P/E ratio: 25 (reasonable for tech)
|
||||
- Strong balance sheet
|
||||
|
||||
## Conclusion
|
||||
""" + "Detailed conclusion. " * 50
|
||||
|
||||
result = validate_report_completeness(
|
||||
well_structured_report,
|
||||
min_length=500,
|
||||
require_markdown_tables=True,
|
||||
require_sections=True,
|
||||
)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["markdown_tables"] > 0
|
||||
assert result.metrics["section_headers"] >= 3
|
||||
assert result.metrics["has_bullet_points"] is True
|
||||
|
||||
def test_decision_clarity_with_reasoning(self):
|
||||
"""Test that clear decisions with reasoning are validated."""
|
||||
clear_decisions = [
|
||||
"BUY: Strong fundamentals (P/E 20), positive momentum (RSI 55), bullish sentiment",
|
||||
"SELL: Overvalued at current P/E of 45, declining revenue, negative news",
|
||||
"HOLD: Mixed signals - good fundamentals but uncertain market conditions",
|
||||
]
|
||||
|
||||
for decision in clear_decisions:
|
||||
result = validate_decision_quality(decision)
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["has_reasoning"] is True
|
||||
assert len(result.warnings) == 0 # Clear decisions shouldn't warn
|
||||
|
||||
def test_decision_ambiguity_detection(self):
|
||||
"""Test detection of ambiguous decisions."""
|
||||
ambiguous_decisions = [
|
||||
"BUY or SELL, not sure",
|
||||
"Maybe HOLD, could be BUY",
|
||||
"SELL but also considering BUY",
|
||||
]
|
||||
|
||||
for decision in ambiguous_decisions:
|
||||
result = validate_decision_quality(decision)
|
||||
# Should still extract first signal
|
||||
assert result.metrics["signal"] is not None
|
||||
# But should warn about ambiguity
|
||||
assert len(result.warnings) > 0
|
||||
|
||||
def test_report_content_variety_indicators(self):
|
||||
"""Test that reports with varied content structure are recognized."""
|
||||
varied_report = """
|
||||
# Comprehensive Analysis
|
||||
|
||||
## Overview
|
||||
Multiple content types present.
|
||||
|
||||
## Data Table
|
||||
| Metric | Q1 | Q2 | Q3 | Q4 |
|
||||
|--------|----|----|----|----|
|
||||
| Revenue| 10M| 12M| 15M| 18M|
|
||||
|
||||
## Key Points
|
||||
- Point 1
|
||||
- Point 2
|
||||
* Point 3
|
||||
|
||||
## Details
|
||||
""" + "Additional detailed analysis. " * 50
|
||||
|
||||
result = validate_report_completeness(varied_report, min_length=500)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["markdown_tables"] > 0
|
||||
assert result.metrics["section_headers"] > 0
|
||||
assert result.metrics["has_bullet_points"] is True
|
||||
# No warnings about lacking structure
|
||||
assert not any("structured" in w.lower() for w in result.warnings)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test State Integrity
|
||||
# ============================================================================
|
||||
|
||||
class TestStateIntegrity:
|
||||
"""Test integrity and consistency of agent state."""
|
||||
|
||||
def test_all_required_fields_present(self, sample_agent_state_buy):
|
||||
"""Test that all required fields are present in state."""
|
||||
state = sample_agent_state_buy
|
||||
|
||||
required_fields = [
|
||||
"company_of_interest",
|
||||
"trade_date",
|
||||
"market_report",
|
||||
"sentiment_report",
|
||||
"news_report",
|
||||
"fundamentals_report",
|
||||
"investment_debate_state",
|
||||
"risk_debate_state",
|
||||
"final_trade_decision",
|
||||
]
|
||||
|
||||
for field in required_fields:
|
||||
assert field in state, f"Missing required field: {field}"
|
||||
|
||||
def test_debate_state_internal_consistency(self, sample_invest_debate):
|
||||
"""Test internal consistency of debate state."""
|
||||
debate = sample_invest_debate
|
||||
|
||||
result = validate_debate_state(debate, debate_type="invest")
|
||||
|
||||
assert result.is_valid is True
|
||||
# Count should match history length (approximately)
|
||||
assert result.metrics["count"] > 0
|
||||
assert result.metrics["history_length"] > 0
|
||||
|
||||
def test_final_decision_aligns_with_debates(self, sample_agent_state_buy):
|
||||
"""Test that final decision aligns with debate conclusions."""
|
||||
state = sample_agent_state_buy
|
||||
|
||||
invest_debate = state["investment_debate_state"]
|
||||
risk_debate = state["risk_debate_state"]
|
||||
final_decision = state["final_trade_decision"]
|
||||
|
||||
# Extract all signals
|
||||
invest_result = validate_debate_state(invest_debate, debate_type="invest")
|
||||
risk_result = validate_debate_state(risk_debate, debate_type="risk")
|
||||
final_result = validate_decision_quality(final_decision)
|
||||
|
||||
# All should be BUY for this scenario
|
||||
assert invest_result.metrics.get("judge_signal") == "BUY"
|
||||
assert risk_result.metrics.get("judge_signal") in ["BUY", "HOLD"]
|
||||
assert final_result.metrics["signal"] == "BUY"
|
||||
|
||||
def test_state_preserves_company_context(self, sample_agent_state_buy):
|
||||
"""Test that company context is preserved throughout state."""
|
||||
state = sample_agent_state_buy
|
||||
|
||||
company = state["company_of_interest"]
|
||||
trade_date = state["trade_date"]
|
||||
|
||||
# Verify basic context
|
||||
assert isinstance(company, str)
|
||||
assert len(company) > 0
|
||||
assert isinstance(trade_date, str)
|
||||
assert len(trade_date) > 0
|
||||
|
||||
def test_debate_history_chronological_consistency(self, sample_invest_debate):
|
||||
"""Test that debate history appears chronologically consistent."""
|
||||
debate = sample_invest_debate
|
||||
|
||||
history = debate["history"]
|
||||
count = debate["count"]
|
||||
|
||||
# History should exist if count > 0
|
||||
if count > 0:
|
||||
assert len(history) > 0
|
||||
|
||||
# If multiple rounds, history should reflect that
|
||||
if count >= 2:
|
||||
# Should have multiple segments or rounds
|
||||
assert len(history) > 50 # Reasonable minimum for 2+ rounds
|
||||
|
||||
def test_type_consistency_across_state(self, sample_agent_state_buy):
|
||||
"""Test that all fields have correct types."""
|
||||
state = sample_agent_state_buy
|
||||
|
||||
# String fields
|
||||
string_fields = [
|
||||
"company_of_interest",
|
||||
"trade_date",
|
||||
"market_report",
|
||||
"sentiment_report",
|
||||
"news_report",
|
||||
"fundamentals_report",
|
||||
"final_trade_decision",
|
||||
]
|
||||
|
||||
for field in string_fields:
|
||||
if field in state:
|
||||
assert isinstance(state[field], str), f"{field} should be string"
|
||||
|
||||
# Dict fields
|
||||
dict_fields = ["investment_debate_state", "risk_debate_state"]
|
||||
|
||||
for field in dict_fields:
|
||||
if field in state:
|
||||
assert isinstance(state[field], dict), f"{field} should be dict"
|
||||
|
||||
def test_empty_state_detection(self):
|
||||
"""Test detection of completely empty state."""
|
||||
empty_state = {}
|
||||
|
||||
result = validate_agent_state(empty_state)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert len(result.errors) >= 2 # At least missing company and date
|
||||
|
|
@ -0,0 +1,700 @@
|
|||
"""
|
||||
Test suite for Output Validation Utilities.
|
||||
|
||||
This module tests:
|
||||
1. ValidationResult dataclass behavior
|
||||
2. Report completeness validation (length, markdown, sections)
|
||||
3. Decision quality validation (signal extraction, reasoning)
|
||||
4. Debate state validation (history, count, judge_decision)
|
||||
5. Complete agent state validation (orchestration)
|
||||
|
||||
All tests use mocked data (no real API calls).
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from typing import Dict, Any
|
||||
|
||||
from tradingagents.utils.output_validator import (
|
||||
ValidationResult,
|
||||
validate_report_completeness,
|
||||
validate_decision_quality,
|
||||
validate_debate_state,
|
||||
validate_agent_state,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test ValidationResult Dataclass
|
||||
# ============================================================================
|
||||
|
||||
class TestValidationResult:
|
||||
"""Test ValidationResult dataclass behavior."""
|
||||
|
||||
def test_default_valid_result(self):
|
||||
"""Test ValidationResult defaults to valid with empty lists."""
|
||||
result = ValidationResult(is_valid=True)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.errors == []
|
||||
assert result.warnings == []
|
||||
assert result.metrics == {}
|
||||
|
||||
def test_add_error_marks_invalid(self):
|
||||
"""Test that add_error() marks result as invalid."""
|
||||
result = ValidationResult(is_valid=True)
|
||||
result.add_error("Something went wrong")
|
||||
|
||||
assert result.is_valid is False
|
||||
assert len(result.errors) == 1
|
||||
assert result.errors[0] == "Something went wrong"
|
||||
|
||||
def test_add_warning_keeps_valid(self):
|
||||
"""Test that add_warning() doesn't change validity."""
|
||||
result = ValidationResult(is_valid=True)
|
||||
result.add_warning("This could be better")
|
||||
|
||||
assert result.is_valid is True
|
||||
assert len(result.warnings) == 1
|
||||
assert result.warnings[0] == "This could be better"
|
||||
|
||||
def test_add_metric(self):
|
||||
"""Test that add_metric() stores key-value pairs."""
|
||||
result = ValidationResult(is_valid=True)
|
||||
result.add_metric("length", 500)
|
||||
result.add_metric("signal", "BUY")
|
||||
|
||||
assert result.metrics["length"] == 500
|
||||
assert result.metrics["signal"] == "BUY"
|
||||
|
||||
def test_multiple_errors_and_warnings(self):
|
||||
"""Test accumulating multiple errors and warnings."""
|
||||
result = ValidationResult(is_valid=True)
|
||||
result.add_error("Error 1")
|
||||
result.add_error("Error 2")
|
||||
result.add_warning("Warning 1")
|
||||
result.add_warning("Warning 2")
|
||||
|
||||
assert result.is_valid is False
|
||||
assert len(result.errors) == 2
|
||||
assert len(result.warnings) == 2
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test Report Validation
|
||||
# ============================================================================
|
||||
|
||||
class TestReportValidation:
|
||||
"""Test validate_report_completeness() function."""
|
||||
|
||||
def test_valid_report_passes(self):
|
||||
"""Test that a valid report passes validation."""
|
||||
report = "# Market Analysis\n\n" + "This is a comprehensive report. " * 50
|
||||
|
||||
result = validate_report_completeness(report, min_length=500)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert len(result.errors) == 0
|
||||
assert result.metrics["length"] > 500
|
||||
|
||||
def test_none_report_fails(self):
|
||||
"""Test that None report fails validation."""
|
||||
result = validate_report_completeness(None)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert "None" in result.errors[0]
|
||||
|
||||
def test_empty_report_fails(self):
|
||||
"""Test that empty report fails validation."""
|
||||
result = validate_report_completeness("")
|
||||
|
||||
assert result.is_valid is False
|
||||
assert "empty" in result.errors[0].lower()
|
||||
|
||||
def test_short_report_fails(self):
|
||||
"""Test that report below min_length fails."""
|
||||
short_report = "Too short"
|
||||
|
||||
result = validate_report_completeness(short_report, min_length=500)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert any("minimum" in err.lower() for err in result.errors)
|
||||
assert result.metrics["length"] < 500
|
||||
|
||||
def test_wrong_type_fails(self):
|
||||
"""Test that non-string report fails validation."""
|
||||
result = validate_report_completeness(123)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert "string" in result.errors[0].lower()
|
||||
|
||||
def test_markdown_table_detection(self):
|
||||
"""Test detection of markdown tables."""
|
||||
report_with_table = """
|
||||
# Analysis
|
||||
|
||||
| Metric | Value |
|
||||
|--------|-------|
|
||||
| Price | $100 |
|
||||
| Volume | 1M |
|
||||
""" + "Additional text. " * 50
|
||||
|
||||
result = validate_report_completeness(
|
||||
report_with_table,
|
||||
min_length=200,
|
||||
require_markdown_tables=True
|
||||
)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["markdown_tables"] > 0
|
||||
|
||||
def test_missing_markdown_table_fails_when_required(self):
|
||||
"""Test that missing markdown tables fails when required."""
|
||||
report = "# Analysis\n\n" + "No tables here. " * 50
|
||||
|
||||
result = validate_report_completeness(
|
||||
report,
|
||||
min_length=200,
|
||||
require_markdown_tables=True
|
||||
)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert any("table" in err.lower() for err in result.errors)
|
||||
|
||||
def test_section_header_detection(self):
|
||||
"""Test detection of section headers."""
|
||||
report_with_headers = """
|
||||
# Main Title
|
||||
## Subsection
|
||||
### Details
|
||||
|
||||
Content here.
|
||||
""" + "More content. " * 50
|
||||
|
||||
result = validate_report_completeness(
|
||||
report_with_headers,
|
||||
min_length=200,
|
||||
require_sections=True
|
||||
)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["section_headers"] >= 3
|
||||
|
||||
def test_missing_sections_fails_when_required(self):
|
||||
"""Test that missing sections fails when required."""
|
||||
report = "Just plain text. " * 50
|
||||
|
||||
result = validate_report_completeness(
|
||||
report,
|
||||
min_length=200,
|
||||
require_sections=True
|
||||
)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert any("section" in err.lower() for err in result.errors)
|
||||
|
||||
def test_short_report_warning(self):
|
||||
"""Test warning for relatively short reports."""
|
||||
# Report is above min but below 1.5x min
|
||||
report = "Short but valid. " * 40 # ~680 chars
|
||||
|
||||
result = validate_report_completeness(report, min_length=500)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert len(result.warnings) > 0
|
||||
assert any("short" in warn.lower() for warn in result.warnings)
|
||||
|
||||
def test_bullet_point_detection(self):
|
||||
"""Test detection of bullet points."""
|
||||
report_with_bullets = """
|
||||
# Analysis
|
||||
|
||||
- Point 1
|
||||
- Point 2
|
||||
* Point 3
|
||||
|
||||
""" + "Additional content. " * 50
|
||||
|
||||
result = validate_report_completeness(report_with_bullets, min_length=200)
|
||||
|
||||
assert result.metrics["has_bullet_points"] is True
|
||||
|
||||
def test_unstructured_content_warning(self):
|
||||
"""Test warning for content lacking structure."""
|
||||
unstructured_report = "Just a long stream of text without any structure. " * 50
|
||||
|
||||
result = validate_report_completeness(unstructured_report, min_length=500)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert any("structured" in warn.lower() for warn in result.warnings)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test Decision Validation
|
||||
# ============================================================================
|
||||
|
||||
class TestDecisionValidation:
|
||||
"""Test validate_decision_quality() function."""
|
||||
|
||||
def test_valid_buy_decision(self):
|
||||
"""Test that valid BUY decision passes."""
|
||||
decision = "BUY: Strong fundamentals and positive momentum"
|
||||
|
||||
result = validate_decision_quality(decision)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["signal"] == "BUY"
|
||||
assert result.metrics["has_reasoning"] is True
|
||||
|
||||
def test_valid_sell_decision(self):
|
||||
"""Test that valid SELL decision passes."""
|
||||
decision = "SELL: Overvalued with deteriorating fundamentals"
|
||||
|
||||
result = validate_decision_quality(decision)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["signal"] == "SELL"
|
||||
|
||||
def test_valid_hold_decision(self):
|
||||
"""Test that valid HOLD decision passes."""
|
||||
decision = "HOLD: Mixed signals, awaiting clarity"
|
||||
|
||||
result = validate_decision_quality(decision)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["signal"] == "HOLD"
|
||||
|
||||
def test_case_insensitive_signal_extraction(self):
|
||||
"""Test that signals are extracted case-insensitively."""
|
||||
decisions = [
|
||||
"buy the stock",
|
||||
"BUY the stock",
|
||||
"Buy the stock",
|
||||
"We should buy",
|
||||
]
|
||||
|
||||
for decision in decisions:
|
||||
result = validate_decision_quality(decision)
|
||||
assert result.metrics["signal"] == "BUY"
|
||||
|
||||
def test_none_decision_fails(self):
|
||||
"""Test that None decision fails validation."""
|
||||
result = validate_decision_quality(None)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert "None" in result.errors[0]
|
||||
|
||||
def test_empty_decision_fails(self):
|
||||
"""Test that empty decision fails validation."""
|
||||
result = validate_decision_quality("")
|
||||
|
||||
assert result.is_valid is False
|
||||
assert "empty" in result.errors[0].lower()
|
||||
|
||||
def test_no_signal_fails(self):
|
||||
"""Test that decision without signal fails."""
|
||||
decision = "This is a decision without a clear signal"
|
||||
|
||||
result = validate_decision_quality(decision)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert any("signal" in err.lower() for err in result.errors)
|
||||
assert result.metrics["signal"] is None
|
||||
|
||||
def test_wrong_type_fails(self):
|
||||
"""Test that non-string decision fails."""
|
||||
result = validate_decision_quality({"decision": "BUY"})
|
||||
|
||||
assert result.is_valid is False
|
||||
assert "string" in result.errors[0].lower()
|
||||
|
||||
def test_multiple_signals_warning(self):
|
||||
"""Test warning for multiple conflicting signals."""
|
||||
decision = "BUY or maybe SELL, hard to decide, could HOLD"
|
||||
|
||||
result = validate_decision_quality(decision)
|
||||
|
||||
# Should still extract first signal
|
||||
assert result.metrics["signal"] == "BUY"
|
||||
# But warn about conflicts
|
||||
assert len(result.warnings) > 0
|
||||
assert any("conflicting" in warn.lower() for warn in result.warnings)
|
||||
|
||||
def test_short_decision_warning(self):
|
||||
"""Test warning for very short decisions."""
|
||||
decision = "BUY"
|
||||
|
||||
result = validate_decision_quality(decision)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert len(result.warnings) > 0
|
||||
assert any("short" in warn.lower() for warn in result.warnings)
|
||||
|
||||
def test_decision_with_reasoning_markers(self):
|
||||
"""Test that reasoning markers are detected."""
|
||||
decisions_with_reasoning = [
|
||||
"BUY: Strong fundamentals",
|
||||
"SELL. Company is overvalued.",
|
||||
"HOLD because market is uncertain",
|
||||
]
|
||||
|
||||
for decision in decisions_with_reasoning:
|
||||
result = validate_decision_quality(decision)
|
||||
assert result.metrics["has_reasoning"] is True
|
||||
|
||||
def test_signal_count_metric(self):
|
||||
"""Test that signal_count metric is accurate."""
|
||||
decision = "BUY BUY BUY! Strong signal to buy"
|
||||
|
||||
result = validate_decision_quality(decision)
|
||||
|
||||
assert result.metrics["signal_count"] == 4
|
||||
assert result.metrics["signal"] == "BUY"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test Debate State Validation
|
||||
# ============================================================================
|
||||
|
||||
class TestDebateStateValidation:
|
||||
"""Test validate_debate_state() function."""
|
||||
|
||||
def test_valid_invest_debate_state(self):
|
||||
"""Test that valid invest debate state passes."""
|
||||
debate_state = {
|
||||
"history": "Round 1: Bull argues...\nRound 2: Bear counters...",
|
||||
"count": 2,
|
||||
"judge_decision": "BUY: Bulls made stronger case",
|
||||
"bull_history": "Bull argument",
|
||||
"bear_history": "Bear argument",
|
||||
}
|
||||
|
||||
result = validate_debate_state(debate_state, debate_type="invest")
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["history_length"] > 0
|
||||
assert result.metrics["count"] == 2
|
||||
assert result.metrics["judge_signal"] == "BUY"
|
||||
|
||||
def test_valid_risk_debate_state(self):
|
||||
"""Test that valid risk debate state passes."""
|
||||
debate_state = {
|
||||
"history": "Round 1: Risky argues...\nRound 2: Safe counters...",
|
||||
"count": 2,
|
||||
"judge_decision": "HOLD: Balanced risk profile",
|
||||
"risky_history": "Risky argument",
|
||||
"safe_history": "Safe argument",
|
||||
"neutral_history": "Neutral argument",
|
||||
}
|
||||
|
||||
result = validate_debate_state(debate_state, debate_type="risk")
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["count"] == 2
|
||||
|
||||
def test_none_debate_state_fails(self):
|
||||
"""Test that None debate state fails."""
|
||||
result = validate_debate_state(None)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert "None" in result.errors[0]
|
||||
|
||||
def test_wrong_type_fails(self):
|
||||
"""Test that non-dict debate state fails."""
|
||||
result = validate_debate_state("not a dict")
|
||||
|
||||
assert result.is_valid is False
|
||||
assert "dict" in result.errors[0].lower()
|
||||
|
||||
def test_missing_required_fields_fails(self):
|
||||
"""Test that missing required fields fails."""
|
||||
incomplete_state = {
|
||||
"history": "Some history",
|
||||
# Missing count and judge_decision
|
||||
}
|
||||
|
||||
result = validate_debate_state(incomplete_state)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert any("missing" in err.lower() for err in result.errors)
|
||||
|
||||
def test_invalid_debate_type_fails(self):
|
||||
"""Test that unknown debate type fails."""
|
||||
debate_state = {
|
||||
"history": "History",
|
||||
"count": 1,
|
||||
"judge_decision": "BUY",
|
||||
}
|
||||
|
||||
result = validate_debate_state(debate_state, debate_type="unknown")
|
||||
|
||||
assert result.is_valid is False
|
||||
assert "unknown" in result.errors[0].lower()
|
||||
|
||||
def test_empty_history_warning(self):
|
||||
"""Test warning for empty history."""
|
||||
debate_state = {
|
||||
"history": "",
|
||||
"count": 0,
|
||||
"judge_decision": "HOLD",
|
||||
}
|
||||
|
||||
result = validate_debate_state(debate_state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert any("empty" in warn.lower() for warn in result.warnings)
|
||||
|
||||
def test_negative_count_fails(self):
|
||||
"""Test that negative count fails."""
|
||||
debate_state = {
|
||||
"history": "History",
|
||||
"count": -1,
|
||||
"judge_decision": "BUY",
|
||||
}
|
||||
|
||||
result = validate_debate_state(debate_state)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert any("negative" in err.lower() for err in result.errors)
|
||||
|
||||
def test_high_count_warning(self):
|
||||
"""Test warning for very high debate count."""
|
||||
debate_state = {
|
||||
"history": "Long debate...",
|
||||
"count": 15,
|
||||
"judge_decision": "SELL",
|
||||
}
|
||||
|
||||
result = validate_debate_state(debate_state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert any("high" in warn.lower() for warn in result.warnings)
|
||||
|
||||
def test_invalid_judge_decision_warning(self):
|
||||
"""Test warning for poor quality judge decision."""
|
||||
debate_state = {
|
||||
"history": "History",
|
||||
"count": 2,
|
||||
"judge_decision": "No clear signal here",
|
||||
}
|
||||
|
||||
result = validate_debate_state(debate_state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert len(result.warnings) > 0
|
||||
|
||||
def test_optional_fields_metric(self):
|
||||
"""Test that optional fields are counted."""
|
||||
debate_state = {
|
||||
"history": "History",
|
||||
"count": 1,
|
||||
"judge_decision": "BUY",
|
||||
"bull_history": "Bull",
|
||||
"bear_history": "Bear",
|
||||
}
|
||||
|
||||
result = validate_debate_state(debate_state, debate_type="invest")
|
||||
|
||||
assert result.metrics["optional_fields_present"] >= 2
|
||||
|
||||
def test_wrong_history_type_fails(self):
|
||||
"""Test that non-string history fails."""
|
||||
debate_state = {
|
||||
"history": 123,
|
||||
"count": 1,
|
||||
"judge_decision": "BUY",
|
||||
}
|
||||
|
||||
result = validate_debate_state(debate_state)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert any("string" in err.lower() for err in result.errors)
|
||||
|
||||
def test_wrong_count_type_fails(self):
|
||||
"""Test that non-int count fails."""
|
||||
debate_state = {
|
||||
"history": "History",
|
||||
"count": "two",
|
||||
"judge_decision": "BUY",
|
||||
}
|
||||
|
||||
result = validate_debate_state(debate_state)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert any("int" in err.lower() for err in result.errors)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test Agent State Validation
|
||||
# ============================================================================
|
||||
|
||||
class TestAgentStateValidation:
|
||||
"""Test validate_agent_state() function."""
|
||||
|
||||
def test_valid_complete_agent_state(self):
|
||||
"""Test that complete valid agent state passes."""
|
||||
state = {
|
||||
"company_of_interest": "AAPL",
|
||||
"trade_date": "2024-01-15",
|
||||
"market_report": "# Market Analysis\n\n" + "Detailed analysis. " * 100,
|
||||
"sentiment_report": "# Sentiment Report\n\n" + "Social sentiment. " * 100,
|
||||
"news_report": "# News Report\n\n" + "Latest news. " * 100,
|
||||
"fundamentals_report": "# Fundamentals\n\n" + "Financial data. " * 100,
|
||||
"investment_debate_state": {
|
||||
"history": "Debate history",
|
||||
"count": 3,
|
||||
"judge_decision": "BUY: Strong case",
|
||||
},
|
||||
"risk_debate_state": {
|
||||
"history": "Risk debate",
|
||||
"count": 2,
|
||||
"judge_decision": "HOLD: Moderate risk",
|
||||
},
|
||||
"final_trade_decision": "BUY: All signals align positively",
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert result.metrics["company_of_interest"] == "AAPL"
|
||||
assert result.metrics["trade_date"] == "2024-01-15"
|
||||
assert result.metrics["reports_present"] == 4
|
||||
assert result.metrics["final_signal"] == "BUY"
|
||||
|
||||
def test_none_state_fails(self):
|
||||
"""Test that None state fails."""
|
||||
result = validate_agent_state(None)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert "None" in result.errors[0]
|
||||
|
||||
def test_wrong_type_fails(self):
|
||||
"""Test that non-dict state fails."""
|
||||
result = validate_agent_state("not a dict")
|
||||
|
||||
assert result.is_valid is False
|
||||
assert "dict" in result.errors[0].lower()
|
||||
|
||||
def test_missing_company_fails(self):
|
||||
"""Test that missing company fails."""
|
||||
state = {
|
||||
"trade_date": "2024-01-15",
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert any("company" in err.lower() for err in result.errors)
|
||||
|
||||
def test_missing_trade_date_fails(self):
|
||||
"""Test that missing trade date fails."""
|
||||
state = {
|
||||
"company_of_interest": "AAPL",
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is False
|
||||
assert any("trade_date" in err.lower() for err in result.errors)
|
||||
|
||||
def test_incomplete_reports_warning(self):
|
||||
"""Test warning when some reports are missing."""
|
||||
state = {
|
||||
"company_of_interest": "AAPL",
|
||||
"trade_date": "2024-01-15",
|
||||
"market_report": "Market analysis. " * 100,
|
||||
# Missing other reports
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
# Basic fields present, so valid
|
||||
assert result.is_valid is True
|
||||
# But warn about missing reports
|
||||
assert len(result.warnings) > 0
|
||||
assert result.metrics["reports_present"] < 4
|
||||
|
||||
def test_invalid_report_warning(self):
|
||||
"""Test warning for invalid report content."""
|
||||
state = {
|
||||
"company_of_interest": "AAPL",
|
||||
"trade_date": "2024-01-15",
|
||||
"market_report": "Too short", # Below min length
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert any("market_report" in warn.lower() for warn in result.warnings)
|
||||
|
||||
def test_invalid_invest_debate_warning(self):
|
||||
"""Test warning for invalid investment debate."""
|
||||
state = {
|
||||
"company_of_interest": "AAPL",
|
||||
"trade_date": "2024-01-15",
|
||||
"investment_debate_state": {
|
||||
# Missing required fields
|
||||
"history": "History",
|
||||
},
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert any("investment debate" in warn.lower() for warn in result.warnings)
|
||||
|
||||
def test_invalid_risk_debate_warning(self):
|
||||
"""Test warning for invalid risk debate."""
|
||||
state = {
|
||||
"company_of_interest": "AAPL",
|
||||
"trade_date": "2024-01-15",
|
||||
"risk_debate_state": {
|
||||
"count": -1, # Invalid
|
||||
},
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert any("risk debate" in warn.lower() for warn in result.warnings)
|
||||
|
||||
def test_invalid_final_decision_warning(self):
|
||||
"""Test warning for invalid final decision."""
|
||||
state = {
|
||||
"company_of_interest": "AAPL",
|
||||
"trade_date": "2024-01-15",
|
||||
"final_trade_decision": "No clear signal",
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert any("final decision" in warn.lower() for warn in result.warnings)
|
||||
|
||||
def test_incomplete_state_warning(self):
|
||||
"""Test warning for very incomplete state."""
|
||||
state = {
|
||||
"company_of_interest": "AAPL",
|
||||
"trade_date": "2024-01-15",
|
||||
# No debates or decision
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.is_valid is True
|
||||
assert any("incomplete" in warn.lower() for warn in result.warnings)
|
||||
|
||||
def test_reports_count_metrics(self):
|
||||
"""Test that report counts are tracked."""
|
||||
state = {
|
||||
"company_of_interest": "AAPL",
|
||||
"trade_date": "2024-01-15",
|
||||
"market_report": "Report. " * 100,
|
||||
"sentiment_report": "Report. " * 100,
|
||||
}
|
||||
|
||||
result = validate_agent_state(state)
|
||||
|
||||
assert result.metrics["reports_present"] == 2
|
||||
assert result.metrics["total_reports_expected"] == 4
|
||||
|
|
@ -0,0 +1,453 @@
|
|||
"""
|
||||
Output validation utilities for agent outputs.
|
||||
|
||||
This module provides validation functions for:
|
||||
- Report completeness (length, structure, markdown formatting)
|
||||
- Decision quality (signal extraction, reasoning clarity)
|
||||
- Debate state coherence (history tracking, judge decisions)
|
||||
- Complete agent state validation
|
||||
|
||||
All validators return ValidationResult with actionable feedback.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Dict, Any
|
||||
import re
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""
|
||||
Result of a validation check with actionable feedback.
|
||||
|
||||
Attributes:
|
||||
is_valid: True if validation passed, False otherwise
|
||||
errors: List of error messages (validation failures)
|
||||
warnings: List of warning messages (quality concerns)
|
||||
metrics: Dictionary of measured metrics (e.g., length, counts)
|
||||
"""
|
||||
is_valid: bool
|
||||
errors: List[str] = field(default_factory=list)
|
||||
warnings: List[str] = field(default_factory=list)
|
||||
metrics: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def add_error(self, message: str) -> None:
|
||||
"""Add an error and mark validation as failed."""
|
||||
self.errors.append(message)
|
||||
self.is_valid = False
|
||||
|
||||
def add_warning(self, message: str) -> None:
|
||||
"""Add a warning (doesn't fail validation)."""
|
||||
self.warnings.append(message)
|
||||
|
||||
def add_metric(self, key: str, value: Any) -> None:
|
||||
"""Add a measured metric."""
|
||||
self.metrics[key] = value
|
||||
|
||||
|
||||
def validate_report_completeness(
|
||||
report: Optional[str],
|
||||
min_length: int = 500,
|
||||
require_markdown_tables: bool = False,
|
||||
require_sections: bool = False,
|
||||
) -> ValidationResult:
|
||||
"""
|
||||
Validate that a report is complete and well-structured.
|
||||
|
||||
Args:
|
||||
report: The report text to validate
|
||||
min_length: Minimum character count required (default: 500)
|
||||
require_markdown_tables: Whether to require markdown tables
|
||||
require_sections: Whether to require section headers (##)
|
||||
|
||||
Returns:
|
||||
ValidationResult with errors, warnings, and metrics
|
||||
|
||||
Example:
|
||||
>>> result = validate_report_completeness("# Report\\n\\nThis is too short")
|
||||
>>> assert not result.is_valid
|
||||
>>> assert "minimum length" in result.errors[0].lower()
|
||||
"""
|
||||
result = ValidationResult(is_valid=True)
|
||||
|
||||
# Check if report exists
|
||||
if report is None:
|
||||
result.add_error("Report is None")
|
||||
return result
|
||||
|
||||
if not isinstance(report, str):
|
||||
result.add_error(f"Report must be string, got {type(report).__name__}")
|
||||
return result
|
||||
|
||||
# Check length
|
||||
report_length = len(report.strip())
|
||||
result.add_metric("length", report_length)
|
||||
|
||||
if report_length == 0:
|
||||
result.add_error("Report is empty")
|
||||
return result
|
||||
|
||||
if report_length < min_length:
|
||||
result.add_error(
|
||||
f"Report length ({report_length}) below minimum ({min_length})"
|
||||
)
|
||||
|
||||
# Check for markdown tables
|
||||
markdown_tables = re.findall(r'\|.*\|', report)
|
||||
result.add_metric("markdown_tables", len(markdown_tables))
|
||||
|
||||
if require_markdown_tables and len(markdown_tables) == 0:
|
||||
result.add_error("Report missing required markdown tables")
|
||||
|
||||
# Check for section headers (allow optional leading whitespace)
|
||||
section_headers = re.findall(r'^\s*#{1,6}\s+.+$', report, re.MULTILINE)
|
||||
result.add_metric("section_headers", len(section_headers))
|
||||
|
||||
if require_sections and len(section_headers) == 0:
|
||||
result.add_error("Report missing required section headers")
|
||||
|
||||
# Quality warnings
|
||||
if report_length < min_length * 1.5:
|
||||
result.add_warning(
|
||||
f"Report is relatively short ({report_length} chars). "
|
||||
f"Consider adding more detail."
|
||||
)
|
||||
|
||||
# Check for basic structure indicators
|
||||
has_bullet_points = bool(re.search(r'^\s*[-*]\s+', report, re.MULTILINE))
|
||||
result.add_metric("has_bullet_points", has_bullet_points)
|
||||
|
||||
if not has_bullet_points and not markdown_tables:
|
||||
result.add_warning("Report lacks structured content (no bullets or tables)")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def validate_decision_quality(decision: Optional[str]) -> ValidationResult:
|
||||
"""
|
||||
Validate trading decision quality and extract signal.
|
||||
|
||||
Validates:
|
||||
- Decision is not None/empty
|
||||
- Contains clear BUY/SELL/HOLD signal
|
||||
- Has reasoning/explanation
|
||||
- Signal is unambiguous
|
||||
|
||||
Args:
|
||||
decision: The decision text to validate
|
||||
|
||||
Returns:
|
||||
ValidationResult with extracted signal in metrics
|
||||
|
||||
Example:
|
||||
>>> result = validate_decision_quality("BUY: Strong fundamentals")
|
||||
>>> assert result.is_valid
|
||||
>>> assert result.metrics["signal"] == "BUY"
|
||||
"""
|
||||
result = ValidationResult(is_valid=True)
|
||||
|
||||
# Check if decision exists
|
||||
if decision is None:
|
||||
result.add_error("Decision is None")
|
||||
return result
|
||||
|
||||
if not isinstance(decision, str):
|
||||
result.add_error(f"Decision must be string, got {type(decision).__name__}")
|
||||
return result
|
||||
|
||||
decision_clean = decision.strip()
|
||||
if not decision_clean:
|
||||
result.add_error("Decision is empty")
|
||||
return result
|
||||
|
||||
result.add_metric("length", len(decision_clean))
|
||||
|
||||
# Extract trading signal (case-insensitive)
|
||||
signal_pattern = r'\b(BUY|SELL|HOLD)\b'
|
||||
matches = re.findall(signal_pattern, decision_clean, re.IGNORECASE)
|
||||
|
||||
if not matches:
|
||||
result.add_error(
|
||||
"No clear trading signal found (expected BUY, SELL, or HOLD)"
|
||||
)
|
||||
result.add_metric("signal", None)
|
||||
return result
|
||||
|
||||
# Get first signal and normalize to uppercase
|
||||
signal = matches[0].upper()
|
||||
result.add_metric("signal", signal)
|
||||
result.add_metric("signal_count", len(matches))
|
||||
|
||||
# Warn if multiple conflicting signals
|
||||
unique_signals = set(m.upper() for m in matches)
|
||||
if len(unique_signals) > 1:
|
||||
result.add_warning(
|
||||
f"Multiple conflicting signals found: {unique_signals}. "
|
||||
f"Using first occurrence: {signal}"
|
||||
)
|
||||
|
||||
# Check for reasoning
|
||||
# Split by common delimiters and check if there's explanation
|
||||
has_reasoning = any([
|
||||
':' in decision_clean,
|
||||
'.' in decision_clean,
|
||||
len(decision_clean.split()) >= 5,
|
||||
])
|
||||
|
||||
result.add_metric("has_reasoning", has_reasoning)
|
||||
|
||||
if not has_reasoning:
|
||||
result.add_warning(
|
||||
"Decision lacks clear reasoning or explanation"
|
||||
)
|
||||
|
||||
# Check decision length
|
||||
if len(decision_clean) < 20:
|
||||
result.add_warning(
|
||||
f"Decision is very short ({len(decision_clean)} chars). "
|
||||
f"Consider adding more rationale."
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def validate_debate_state(
|
||||
debate_state: Optional[Dict[str, Any]],
|
||||
debate_type: str = "invest",
|
||||
) -> ValidationResult:
|
||||
"""
|
||||
Validate debate state structure and coherence.
|
||||
|
||||
Validates:
|
||||
- Required fields present (history, count, judge_decision)
|
||||
- History is not empty
|
||||
- Count is reasonable (>= 0)
|
||||
- Judge decision exists if debate concluded
|
||||
|
||||
Args:
|
||||
debate_state: The debate state dictionary to validate
|
||||
debate_type: Type of debate ("invest" or "risk")
|
||||
|
||||
Returns:
|
||||
ValidationResult with debate metrics
|
||||
|
||||
Example:
|
||||
>>> state = {"history": "Round 1...", "count": 1, "judge_decision": "BUY"}
|
||||
>>> result = validate_debate_state(state)
|
||||
>>> assert result.is_valid
|
||||
"""
|
||||
result = ValidationResult(is_valid=True)
|
||||
|
||||
# Check if state exists
|
||||
if debate_state is None:
|
||||
result.add_error("Debate state is None")
|
||||
return result
|
||||
|
||||
if not isinstance(debate_state, dict):
|
||||
result.add_error(
|
||||
f"Debate state must be dict, got {type(debate_state).__name__}"
|
||||
)
|
||||
return result
|
||||
|
||||
# Define required fields based on debate type
|
||||
if debate_type == "invest":
|
||||
required_fields = ["history", "count", "judge_decision"]
|
||||
optional_fields = ["bull_history", "bear_history", "current_response"]
|
||||
elif debate_type == "risk":
|
||||
required_fields = ["history", "count", "judge_decision"]
|
||||
optional_fields = [
|
||||
"risky_history",
|
||||
"safe_history",
|
||||
"neutral_history",
|
||||
"latest_speaker",
|
||||
"current_risky_response",
|
||||
"current_safe_response",
|
||||
"current_neutral_response",
|
||||
]
|
||||
else:
|
||||
result.add_error(f"Unknown debate type: {debate_type}")
|
||||
return result
|
||||
|
||||
# Check required fields
|
||||
missing_fields = [f for f in required_fields if f not in debate_state]
|
||||
if missing_fields:
|
||||
result.add_error(f"Missing required fields: {missing_fields}")
|
||||
return result
|
||||
|
||||
# Validate history
|
||||
history = debate_state.get("history")
|
||||
if history is not None:
|
||||
if not isinstance(history, str):
|
||||
result.add_error(
|
||||
f"History must be string, got {type(history).__name__}"
|
||||
)
|
||||
elif not history.strip():
|
||||
result.add_warning("History is empty")
|
||||
else:
|
||||
result.add_metric("history_length", len(history))
|
||||
|
||||
# Validate count
|
||||
count = debate_state.get("count")
|
||||
if count is not None:
|
||||
if not isinstance(count, int):
|
||||
result.add_error(f"Count must be int, got {type(count).__name__}")
|
||||
elif count < 0:
|
||||
result.add_error(f"Count cannot be negative: {count}")
|
||||
else:
|
||||
result.add_metric("count", count)
|
||||
|
||||
# Warn if debate went too long
|
||||
if count > 10:
|
||||
result.add_warning(
|
||||
f"Debate count is very high ({count}). "
|
||||
f"May indicate convergence issues."
|
||||
)
|
||||
|
||||
# Validate judge decision
|
||||
judge_decision = debate_state.get("judge_decision")
|
||||
if judge_decision is not None:
|
||||
if isinstance(judge_decision, str):
|
||||
if judge_decision.strip():
|
||||
# Validate decision quality
|
||||
decision_result = validate_decision_quality(judge_decision)
|
||||
if not decision_result.is_valid:
|
||||
result.add_warning(
|
||||
f"Judge decision has quality issues: "
|
||||
f"{', '.join(decision_result.errors)}"
|
||||
)
|
||||
else:
|
||||
result.add_metric("judge_signal", decision_result.metrics.get("signal"))
|
||||
else:
|
||||
result.add_warning("Judge decision is empty")
|
||||
else:
|
||||
result.add_error(
|
||||
f"Judge decision must be string, got {type(judge_decision).__name__}"
|
||||
)
|
||||
|
||||
# Check optional fields for completeness
|
||||
present_optional = [f for f in optional_fields if f in debate_state]
|
||||
result.add_metric("optional_fields_present", len(present_optional))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def validate_agent_state(state: Optional[Dict[str, Any]]) -> ValidationResult:
|
||||
"""
|
||||
Validate complete agent state structure.
|
||||
|
||||
Orchestrates all validators to check:
|
||||
- Company and trade date present
|
||||
- All reports complete
|
||||
- Investment debate state valid
|
||||
- Risk debate state valid
|
||||
- Final decision quality
|
||||
|
||||
Args:
|
||||
state: The complete agent state dictionary
|
||||
|
||||
Returns:
|
||||
ValidationResult with comprehensive validation
|
||||
|
||||
Example:
|
||||
>>> state = {
|
||||
... "company_of_interest": "AAPL",
|
||||
... "trade_date": "2024-01-15",
|
||||
... "market_report": "Market analysis..." * 100,
|
||||
... }
|
||||
>>> result = validate_agent_state(state)
|
||||
>>> assert "company_of_interest" in result.metrics
|
||||
"""
|
||||
result = ValidationResult(is_valid=True)
|
||||
|
||||
# Check if state exists
|
||||
if state is None:
|
||||
result.add_error("Agent state is None")
|
||||
return result
|
||||
|
||||
if not isinstance(state, dict):
|
||||
result.add_error(f"Agent state must be dict, got {type(state).__name__}")
|
||||
return result
|
||||
|
||||
# Validate basic fields
|
||||
company = state.get("company_of_interest")
|
||||
if not company:
|
||||
result.add_error("Missing company_of_interest")
|
||||
else:
|
||||
result.add_metric("company_of_interest", company)
|
||||
|
||||
trade_date = state.get("trade_date")
|
||||
if not trade_date:
|
||||
result.add_error("Missing trade_date")
|
||||
else:
|
||||
result.add_metric("trade_date", trade_date)
|
||||
|
||||
# Validate reports
|
||||
report_fields = [
|
||||
"market_report",
|
||||
"sentiment_report",
|
||||
"news_report",
|
||||
"fundamentals_report",
|
||||
]
|
||||
|
||||
reports_present = 0
|
||||
for report_field in report_fields:
|
||||
report = state.get(report_field)
|
||||
if report:
|
||||
reports_present += 1
|
||||
report_result = validate_report_completeness(
|
||||
report,
|
||||
min_length=500,
|
||||
require_markdown_tables=False,
|
||||
require_sections=False,
|
||||
)
|
||||
if not report_result.is_valid:
|
||||
result.add_warning(
|
||||
f"{report_field} has issues: {', '.join(report_result.errors)}"
|
||||
)
|
||||
|
||||
result.add_metric("reports_present", reports_present)
|
||||
result.add_metric("total_reports_expected", len(report_fields))
|
||||
|
||||
if reports_present < len(report_fields):
|
||||
result.add_warning(
|
||||
f"Only {reports_present}/{len(report_fields)} reports present"
|
||||
)
|
||||
|
||||
# Validate investment debate state
|
||||
invest_debate = state.get("investment_debate_state")
|
||||
if invest_debate:
|
||||
invest_result = validate_debate_state(invest_debate, debate_type="invest")
|
||||
if not invest_result.is_valid:
|
||||
result.add_warning(
|
||||
f"Investment debate has issues: {', '.join(invest_result.errors)}"
|
||||
)
|
||||
result.add_metric("investment_debate_valid", invest_result.is_valid)
|
||||
|
||||
# Validate risk debate state
|
||||
risk_debate = state.get("risk_debate_state")
|
||||
if risk_debate:
|
||||
risk_result = validate_debate_state(risk_debate, debate_type="risk")
|
||||
if not risk_result.is_valid:
|
||||
result.add_warning(
|
||||
f"Risk debate has issues: {', '.join(risk_result.errors)}"
|
||||
)
|
||||
result.add_metric("risk_debate_valid", risk_result.is_valid)
|
||||
|
||||
# Validate final decision
|
||||
final_decision = state.get("final_trade_decision")
|
||||
if final_decision:
|
||||
decision_result = validate_decision_quality(final_decision)
|
||||
if not decision_result.is_valid:
|
||||
result.add_warning(
|
||||
f"Final decision has issues: {', '.join(decision_result.errors)}"
|
||||
)
|
||||
else:
|
||||
result.add_metric("final_signal", decision_result.metrics.get("signal"))
|
||||
|
||||
# Overall completeness check
|
||||
if not invest_debate and not risk_debate:
|
||||
result.add_warning(
|
||||
"State appears incomplete: no debate states present"
|
||||
)
|
||||
|
||||
return result
|
||||
Loading…
Reference in New Issue