454 lines
14 KiB
Python
454 lines
14 KiB
Python
"""
|
|
Output validation utilities for agent outputs.
|
|
|
|
This module provides validation functions for:
|
|
- Report completeness (length, structure, markdown formatting)
|
|
- Decision quality (signal extraction, reasoning clarity)
|
|
- Debate state coherence (history tracking, judge decisions)
|
|
- Complete agent state validation
|
|
|
|
All validators return ValidationResult with actionable feedback.
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import List, Optional, Dict, Any
|
|
import re
|
|
|
|
|
|
@dataclass
|
|
class ValidationResult:
|
|
"""
|
|
Result of a validation check with actionable feedback.
|
|
|
|
Attributes:
|
|
is_valid: True if validation passed, False otherwise
|
|
errors: List of error messages (validation failures)
|
|
warnings: List of warning messages (quality concerns)
|
|
metrics: Dictionary of measured metrics (e.g., length, counts)
|
|
"""
|
|
is_valid: bool
|
|
errors: List[str] = field(default_factory=list)
|
|
warnings: List[str] = field(default_factory=list)
|
|
metrics: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
def add_error(self, message: str) -> None:
|
|
"""Add an error and mark validation as failed."""
|
|
self.errors.append(message)
|
|
self.is_valid = False
|
|
|
|
def add_warning(self, message: str) -> None:
|
|
"""Add a warning (doesn't fail validation)."""
|
|
self.warnings.append(message)
|
|
|
|
def add_metric(self, key: str, value: Any) -> None:
|
|
"""Add a measured metric."""
|
|
self.metrics[key] = value
|
|
|
|
|
|
def validate_report_completeness(
|
|
report: Optional[str],
|
|
min_length: int = 500,
|
|
require_markdown_tables: bool = False,
|
|
require_sections: bool = False,
|
|
) -> ValidationResult:
|
|
"""
|
|
Validate that a report is complete and well-structured.
|
|
|
|
Args:
|
|
report: The report text to validate
|
|
min_length: Minimum character count required (default: 500)
|
|
require_markdown_tables: Whether to require markdown tables
|
|
require_sections: Whether to require section headers (##)
|
|
|
|
Returns:
|
|
ValidationResult with errors, warnings, and metrics
|
|
|
|
Example:
|
|
>>> result = validate_report_completeness("# Report\\n\\nThis is too short")
|
|
>>> assert not result.is_valid
|
|
>>> assert "minimum length" in result.errors[0].lower()
|
|
"""
|
|
result = ValidationResult(is_valid=True)
|
|
|
|
# Check if report exists
|
|
if report is None:
|
|
result.add_error("Report is None")
|
|
return result
|
|
|
|
if not isinstance(report, str):
|
|
result.add_error(f"Report must be string, got {type(report).__name__}")
|
|
return result
|
|
|
|
# Check length
|
|
report_length = len(report.strip())
|
|
result.add_metric("length", report_length)
|
|
|
|
if report_length == 0:
|
|
result.add_error("Report is empty")
|
|
return result
|
|
|
|
if report_length < min_length:
|
|
result.add_error(
|
|
f"Report length ({report_length}) below minimum ({min_length})"
|
|
)
|
|
|
|
# Check for markdown tables
|
|
markdown_tables = re.findall(r'\|.*\|', report)
|
|
result.add_metric("markdown_tables", len(markdown_tables))
|
|
|
|
if require_markdown_tables and len(markdown_tables) == 0:
|
|
result.add_error("Report missing required markdown tables")
|
|
|
|
# Check for section headers (allow optional leading whitespace)
|
|
section_headers = re.findall(r'^\s*#{1,6}\s+.+$', report, re.MULTILINE)
|
|
result.add_metric("section_headers", len(section_headers))
|
|
|
|
if require_sections and len(section_headers) == 0:
|
|
result.add_error("Report missing required section headers")
|
|
|
|
# Quality warnings
|
|
if report_length < min_length * 1.5:
|
|
result.add_warning(
|
|
f"Report is relatively short ({report_length} chars). "
|
|
f"Consider adding more detail."
|
|
)
|
|
|
|
# Check for basic structure indicators
|
|
has_bullet_points = bool(re.search(r'^\s*[-*]\s+', report, re.MULTILINE))
|
|
result.add_metric("has_bullet_points", has_bullet_points)
|
|
|
|
if not has_bullet_points and not markdown_tables:
|
|
result.add_warning("Report lacks structured content (no bullets or tables)")
|
|
|
|
return result
|
|
|
|
|
|
def validate_decision_quality(decision: Optional[str]) -> ValidationResult:
|
|
"""
|
|
Validate trading decision quality and extract signal.
|
|
|
|
Validates:
|
|
- Decision is not None/empty
|
|
- Contains clear BUY/SELL/HOLD signal
|
|
- Has reasoning/explanation
|
|
- Signal is unambiguous
|
|
|
|
Args:
|
|
decision: The decision text to validate
|
|
|
|
Returns:
|
|
ValidationResult with extracted signal in metrics
|
|
|
|
Example:
|
|
>>> result = validate_decision_quality("BUY: Strong fundamentals")
|
|
>>> assert result.is_valid
|
|
>>> assert result.metrics["signal"] == "BUY"
|
|
"""
|
|
result = ValidationResult(is_valid=True)
|
|
|
|
# Check if decision exists
|
|
if decision is None:
|
|
result.add_error("Decision is None")
|
|
return result
|
|
|
|
if not isinstance(decision, str):
|
|
result.add_error(f"Decision must be string, got {type(decision).__name__}")
|
|
return result
|
|
|
|
decision_clean = decision.strip()
|
|
if not decision_clean:
|
|
result.add_error("Decision is empty")
|
|
return result
|
|
|
|
result.add_metric("length", len(decision_clean))
|
|
|
|
# Extract trading signal (case-insensitive)
|
|
signal_pattern = r'\b(BUY|SELL|HOLD)\b'
|
|
matches = re.findall(signal_pattern, decision_clean, re.IGNORECASE)
|
|
|
|
if not matches:
|
|
result.add_error(
|
|
"No clear trading signal found (expected BUY, SELL, or HOLD)"
|
|
)
|
|
result.add_metric("signal", None)
|
|
return result
|
|
|
|
# Get first signal and normalize to uppercase
|
|
signal = matches[0].upper()
|
|
result.add_metric("signal", signal)
|
|
result.add_metric("signal_count", len(matches))
|
|
|
|
# Warn if multiple conflicting signals
|
|
unique_signals = set(m.upper() for m in matches)
|
|
if len(unique_signals) > 1:
|
|
result.add_warning(
|
|
f"Multiple conflicting signals found: {unique_signals}. "
|
|
f"Using first occurrence: {signal}"
|
|
)
|
|
|
|
# Check for reasoning
|
|
# Split by common delimiters and check if there's explanation
|
|
has_reasoning = any([
|
|
':' in decision_clean,
|
|
'.' in decision_clean,
|
|
len(decision_clean.split()) >= 5,
|
|
])
|
|
|
|
result.add_metric("has_reasoning", has_reasoning)
|
|
|
|
if not has_reasoning:
|
|
result.add_warning(
|
|
"Decision lacks clear reasoning or explanation"
|
|
)
|
|
|
|
# Check decision length
|
|
if len(decision_clean) < 20:
|
|
result.add_warning(
|
|
f"Decision is very short ({len(decision_clean)} chars). "
|
|
f"Consider adding more rationale."
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
def validate_debate_state(
|
|
debate_state: Optional[Dict[str, Any]],
|
|
debate_type: str = "invest",
|
|
) -> ValidationResult:
|
|
"""
|
|
Validate debate state structure and coherence.
|
|
|
|
Validates:
|
|
- Required fields present (history, count, judge_decision)
|
|
- History is not empty
|
|
- Count is reasonable (>= 0)
|
|
- Judge decision exists if debate concluded
|
|
|
|
Args:
|
|
debate_state: The debate state dictionary to validate
|
|
debate_type: Type of debate ("invest" or "risk")
|
|
|
|
Returns:
|
|
ValidationResult with debate metrics
|
|
|
|
Example:
|
|
>>> state = {"history": "Round 1...", "count": 1, "judge_decision": "BUY"}
|
|
>>> result = validate_debate_state(state)
|
|
>>> assert result.is_valid
|
|
"""
|
|
result = ValidationResult(is_valid=True)
|
|
|
|
# Check if state exists
|
|
if debate_state is None:
|
|
result.add_error("Debate state is None")
|
|
return result
|
|
|
|
if not isinstance(debate_state, dict):
|
|
result.add_error(
|
|
f"Debate state must be dict, got {type(debate_state).__name__}"
|
|
)
|
|
return result
|
|
|
|
# Define required fields based on debate type
|
|
if debate_type == "invest":
|
|
required_fields = ["history", "count", "judge_decision"]
|
|
optional_fields = ["bull_history", "bear_history", "current_response"]
|
|
elif debate_type == "risk":
|
|
required_fields = ["history", "count", "judge_decision"]
|
|
optional_fields = [
|
|
"risky_history",
|
|
"safe_history",
|
|
"neutral_history",
|
|
"latest_speaker",
|
|
"current_risky_response",
|
|
"current_safe_response",
|
|
"current_neutral_response",
|
|
]
|
|
else:
|
|
result.add_error(f"Unknown debate type: {debate_type}")
|
|
return result
|
|
|
|
# Check required fields
|
|
missing_fields = [f for f in required_fields if f not in debate_state]
|
|
if missing_fields:
|
|
result.add_error(f"Missing required fields: {missing_fields}")
|
|
return result
|
|
|
|
# Validate history
|
|
history = debate_state.get("history")
|
|
if history is not None:
|
|
if not isinstance(history, str):
|
|
result.add_error(
|
|
f"History must be string, got {type(history).__name__}"
|
|
)
|
|
elif not history.strip():
|
|
result.add_warning("History is empty")
|
|
else:
|
|
result.add_metric("history_length", len(history))
|
|
|
|
# Validate count
|
|
count = debate_state.get("count")
|
|
if count is not None:
|
|
if not isinstance(count, int):
|
|
result.add_error(f"Count must be int, got {type(count).__name__}")
|
|
elif count < 0:
|
|
result.add_error(f"Count cannot be negative: {count}")
|
|
else:
|
|
result.add_metric("count", count)
|
|
|
|
# Warn if debate went too long
|
|
if count > 10:
|
|
result.add_warning(
|
|
f"Debate count is very high ({count}). "
|
|
f"May indicate convergence issues."
|
|
)
|
|
|
|
# Validate judge decision
|
|
judge_decision = debate_state.get("judge_decision")
|
|
if judge_decision is not None:
|
|
if isinstance(judge_decision, str):
|
|
if judge_decision.strip():
|
|
# Validate decision quality
|
|
decision_result = validate_decision_quality(judge_decision)
|
|
if not decision_result.is_valid:
|
|
result.add_warning(
|
|
f"Judge decision has quality issues: "
|
|
f"{', '.join(decision_result.errors)}"
|
|
)
|
|
else:
|
|
result.add_metric("judge_signal", decision_result.metrics.get("signal"))
|
|
else:
|
|
result.add_warning("Judge decision is empty")
|
|
else:
|
|
result.add_error(
|
|
f"Judge decision must be string, got {type(judge_decision).__name__}"
|
|
)
|
|
|
|
# Check optional fields for completeness
|
|
present_optional = [f for f in optional_fields if f in debate_state]
|
|
result.add_metric("optional_fields_present", len(present_optional))
|
|
|
|
return result
|
|
|
|
|
|
def validate_agent_state(state: Optional[Dict[str, Any]]) -> ValidationResult:
|
|
"""
|
|
Validate complete agent state structure.
|
|
|
|
Orchestrates all validators to check:
|
|
- Company and trade date present
|
|
- All reports complete
|
|
- Investment debate state valid
|
|
- Risk debate state valid
|
|
- Final decision quality
|
|
|
|
Args:
|
|
state: The complete agent state dictionary
|
|
|
|
Returns:
|
|
ValidationResult with comprehensive validation
|
|
|
|
Example:
|
|
>>> state = {
|
|
... "company_of_interest": "AAPL",
|
|
... "trade_date": "2024-01-15",
|
|
... "market_report": "Market analysis..." * 100,
|
|
... }
|
|
>>> result = validate_agent_state(state)
|
|
>>> assert "company_of_interest" in result.metrics
|
|
"""
|
|
result = ValidationResult(is_valid=True)
|
|
|
|
# Check if state exists
|
|
if state is None:
|
|
result.add_error("Agent state is None")
|
|
return result
|
|
|
|
if not isinstance(state, dict):
|
|
result.add_error(f"Agent state must be dict, got {type(state).__name__}")
|
|
return result
|
|
|
|
# Validate basic fields
|
|
company = state.get("company_of_interest")
|
|
if not company:
|
|
result.add_error("Missing company_of_interest")
|
|
else:
|
|
result.add_metric("company_of_interest", company)
|
|
|
|
trade_date = state.get("trade_date")
|
|
if not trade_date:
|
|
result.add_error("Missing trade_date")
|
|
else:
|
|
result.add_metric("trade_date", trade_date)
|
|
|
|
# Validate reports
|
|
report_fields = [
|
|
"market_report",
|
|
"sentiment_report",
|
|
"news_report",
|
|
"fundamentals_report",
|
|
]
|
|
|
|
reports_present = 0
|
|
for report_field in report_fields:
|
|
report = state.get(report_field)
|
|
if report:
|
|
reports_present += 1
|
|
report_result = validate_report_completeness(
|
|
report,
|
|
min_length=500,
|
|
require_markdown_tables=False,
|
|
require_sections=False,
|
|
)
|
|
if not report_result.is_valid:
|
|
result.add_warning(
|
|
f"{report_field} has issues: {', '.join(report_result.errors)}"
|
|
)
|
|
|
|
result.add_metric("reports_present", reports_present)
|
|
result.add_metric("total_reports_expected", len(report_fields))
|
|
|
|
if reports_present < len(report_fields):
|
|
result.add_warning(
|
|
f"Only {reports_present}/{len(report_fields)} reports present"
|
|
)
|
|
|
|
# Validate investment debate state
|
|
invest_debate = state.get("investment_debate_state")
|
|
if invest_debate:
|
|
invest_result = validate_debate_state(invest_debate, debate_type="invest")
|
|
if not invest_result.is_valid:
|
|
result.add_warning(
|
|
f"Investment debate has issues: {', '.join(invest_result.errors)}"
|
|
)
|
|
result.add_metric("investment_debate_valid", invest_result.is_valid)
|
|
|
|
# Validate risk debate state
|
|
risk_debate = state.get("risk_debate_state")
|
|
if risk_debate:
|
|
risk_result = validate_debate_state(risk_debate, debate_type="risk")
|
|
if not risk_result.is_valid:
|
|
result.add_warning(
|
|
f"Risk debate has issues: {', '.join(risk_result.errors)}"
|
|
)
|
|
result.add_metric("risk_debate_valid", risk_result.is_valid)
|
|
|
|
# Validate final decision
|
|
final_decision = state.get("final_trade_decision")
|
|
if final_decision:
|
|
decision_result = validate_decision_quality(final_decision)
|
|
if not decision_result.is_valid:
|
|
result.add_warning(
|
|
f"Final decision has issues: {', '.join(decision_result.errors)}"
|
|
)
|
|
else:
|
|
result.add_metric("final_signal", decision_result.metrics.get("signal"))
|
|
|
|
# Overall completeness check
|
|
if not invest_debate and not risk_debate:
|
|
result.add_warning(
|
|
"State appears incomplete: no debate states present"
|
|
)
|
|
|
|
return result
|