TradingAgents/.claude/lib/hybrid_validator.py

385 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Hybrid Manifest Validator - Orchestrates GenAI and regex validation
This module provides a hybrid validation approach that tries GenAI validation
first and falls back to regex validation if API key is missing.
Validation Modes:
- AUTO: Try GenAI, fall back to regex if no API key
- GENAI_ONLY: Use only GenAI (fail if no API key)
- REGEX_ONLY: Use only regex validation
Security Features:
- Path validation via security_utils
- Consistent error handling
- Audit logging
Usage:
from hybrid_validator import HybridManifestValidator, ValidationMode
# Auto mode (default)
validator = HybridManifestValidator(repo_root)
result = validator.validate()
# Explicit mode
validator = HybridManifestValidator(repo_root, mode=ValidationMode.REGEX_ONLY)
result = validator.validate()
# Convenience function
result = validate_manifest_alignment(repo_root, mode="auto")
Date: 2025-12-24
Related: Issue #160 - GenAI manifest alignment validation
Agent: implementer
"""
import json
import sys
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Dict, Any, List
# Import validators
try:
from plugins.autonomous_dev.lib.genai_manifest_validator import (
GenAIManifestValidator,
ManifestValidationResult as GenAIResult,
ManifestIssue as GenAIIssue,
IssueLevel as GenAILevel,
)
from plugins.autonomous_dev.lib.validate_manifest_doc_alignment import (
validate_alignment as regex_validate_alignment,
)
from plugins.autonomous_dev.lib.validate_documentation_parity import (
ParityReport,
ParityIssue,
ValidationLevel,
)
from plugins.autonomous_dev.lib.security_utils import (
validate_path,
audit_log,
PROJECT_ROOT,
)
except ImportError:
# Fallback for testing
PROJECT_ROOT = Path(__file__).parent.parent.parent.parent.resolve()
def validate_path(path: Path, context: str) -> Path:
"""Fallback path validation."""
if not path.exists():
raise ValueError(f"Path does not exist: {path}")
return path.resolve()
def audit_log(event_type: str, status: str, context: Dict[str, Any]) -> None:
"""Fallback audit logging."""
pass
@dataclass
class HybridValidationReport(ParityReport):
"""
Extended ParityReport with hybrid validator metadata.
Adds tracking for which validator was used (genai or regex).
"""
validator_used: str = "unknown"
@property
def is_valid(self) -> bool:
"""Report is valid if no errors found."""
return self.error_count == 0
@property
def issues(self) -> List[ParityIssue]:
"""All issues across categories."""
return (
self.version_issues
+ self.count_issues
+ self.cross_reference_issues
+ self.changelog_issues
+ self.security_issues
)
def get_exit_code(self) -> int:
"""Return exit code for CLI usage (0 for success, 1 for errors)."""
return 0 if self.error_count == 0 else 1
class ValidationMode(Enum):
"""Validation mode for hybrid validator."""
AUTO = "auto" # Try GenAI, fallback to regex
GENAI_ONLY = "genai-only" # Only GenAI (fail if no key)
REGEX_ONLY = "regex-only" # Only regex validation
class HybridManifestValidator:
"""
Hybrid manifest validator with GenAI and regex fallback.
Orchestrates GenAI validation (LLM-powered) with regex validation
(pattern-based) fallback for environments without API keys.
Attributes:
repo_root: Repository root directory
mode: Validation mode (AUTO, GENAI_ONLY, REGEX_ONLY)
"""
def __init__(self, repo_root: Path, mode: ValidationMode = ValidationMode.AUTO):
"""
Initialize hybrid validator.
Args:
repo_root: Repository root directory
mode: Validation mode
Raises:
ValueError: If repo_root invalid
"""
# Detect if we're in test mode (pytest running)
import sys
test_mode = "pytest" in sys.modules
self.repo_root = validate_path(Path(repo_root), "repo_root", test_mode=test_mode)
self.mode = mode
def validate(self) -> HybridValidationReport:
"""
Validate manifest alignment using hybrid approach.
Returns:
HybridValidationReport with validation results
Raises:
FileNotFoundError: If required files missing
RuntimeError: If GenAI-only mode and no API key
"""
if self.mode == ValidationMode.REGEX_ONLY:
return self._validate_regex()
if self.mode == ValidationMode.GENAI_ONLY:
return self._validate_genai_only()
# AUTO mode: try GenAI, fall back to regex
return self._validate_auto()
def _validate_auto(self) -> HybridValidationReport:
"""Validate with GenAI, fall back to regex if no API key."""
try:
genai_validator = GenAIManifestValidator(self.repo_root)
result = genai_validator.validate()
if result is None:
# No API key, fall back to regex
audit_log(
"hybrid_validation",
"fallback_to_regex",
{"repo_root": str(self.repo_root), "reason": "no_api_key"},
)
return self._validate_regex()
# GenAI validation successful
return self._convert_genai_result(result)
except Exception as e:
# GenAI failed, fall back to regex
audit_log(
"hybrid_validation",
"fallback_to_regex",
{
"repo_root": str(self.repo_root),
"reason": "genai_error",
"error": str(e),
},
)
return self._validate_regex()
def _validate_genai_only(self) -> HybridValidationReport:
"""Validate with GenAI only (fail if no API key)."""
genai_validator = GenAIManifestValidator(self.repo_root)
result = genai_validator.validate()
if result is None:
# Return error report instead of raising exception
report = HybridValidationReport(validator_used="genai")
error_issue = ParityIssue(
level=ValidationLevel.ERROR,
message="GenAI validation requires API key",
details="Set ANTHROPIC_API_KEY or OPENROUTER_API_KEY, or use --mode=regex-only",
)
report.count_issues.append(error_issue)
return report
return self._convert_genai_result(result)
def _validate_regex(self) -> HybridValidationReport:
"""Validate with regex only."""
from plugins.autonomous_dev.lib.validate_manifest_doc_alignment import (
validate_alignment,
)
# Build paths
manifest_path = (
self.repo_root
/ "plugins"
/ "autonomous-dev"
/ "config"
/ "install_manifest.json"
)
claude_md_path = self.repo_root / "CLAUDE.md"
project_md_path = self.repo_root / "PROJECT.md"
# Call regex validator
result_dict = validate_alignment(
manifest_path=manifest_path,
claude_md_path=claude_md_path if claude_md_path.exists() else None,
project_md_path=project_md_path if project_md_path.exists() else None,
)
# Convert to HybridValidationReport format
report = HybridValidationReport(validator_used="regex")
# Process mismatches
for key, mismatch in result_dict.get("mismatches", {}).items():
if "error" in mismatch:
# Format error
level = ValidationLevel.ERROR
message = mismatch["error"]
details = f"File: {mismatch.get('file', 'unknown')}"
else:
# Format count mismatch
level = ValidationLevel.ERROR
component = key.replace("claude_md_", "").replace("project_md_", "")
message = f"{component}: expected {mismatch['expected']}, found {mismatch['actual']}"
details = f"File: {mismatch.get('file', 'unknown')}"
parity_issue = ParityIssue(level=level, message=message, details=details)
report.count_issues.append(parity_issue)
audit_log(
"hybrid_validation",
"regex_complete",
{
"repo_root": str(self.repo_root),
"issue_count": len(report.count_issues),
},
)
return report
def _convert_genai_result(self, result: "GenAIResult") -> HybridValidationReport:
"""
Convert GenAI result to HybridValidationReport format.
Args:
result: GenAI validation result
Returns:
HybridValidationReport with validator_used="genai"
"""
report = HybridValidationReport(validator_used="genai")
for issue in result.issues:
# Map GenAI level to ValidationLevel
if issue.level.value == "ERROR":
level = ValidationLevel.ERROR
elif issue.level.value == "WARNING":
level = ValidationLevel.WARNING
else:
level = ValidationLevel.INFO
# Format message with component and location
message = f"{issue.component}: {issue.message}"
details = issue.details
if issue.location:
details += f"\nLocation: {issue.location}"
parity_issue = ParityIssue(level=level, message=message, details=details)
report.count_issues.append(parity_issue)
return report
def validate_manifest_alignment(
repo_root: Path, mode: str = "auto"
) -> HybridValidationReport:
"""
Convenience function for manifest alignment validation.
Args:
repo_root: Repository root directory
mode: Validation mode ("auto", "genai-only", "regex-only")
Returns:
ParityReport with validation results
Raises:
ValueError: If mode invalid
"""
try:
validation_mode = ValidationMode(mode)
except ValueError:
raise ValueError(
f"Invalid mode: {mode}. "
f"Must be one of: {', '.join(m.value for m in ValidationMode)}"
)
validator = HybridManifestValidator(repo_root, mode=validation_mode)
return validator.validate()
def main():
"""CLI entry point."""
import argparse
parser = argparse.ArgumentParser(description="Hybrid manifest alignment validator")
parser.add_argument(
"--repo-root",
type=Path,
default=PROJECT_ROOT,
help="Repository root directory",
)
parser.add_argument(
"--mode",
choices=["auto", "genai-only", "regex-only"],
default="auto",
help="Validation mode",
)
parser.add_argument("--json", action="store_true", help="Output JSON format")
args = parser.parse_args()
try:
result = validate_manifest_alignment(args.repo_root, mode=args.mode)
if args.json:
output = {
"is_valid": result.error_count == 0,
"error_count": result.error_count,
"warning_count": result.warning_count,
"issues": [
{"level": issue.level.value, "message": issue.message}
for issue in result.count_issues
],
}
print(json.dumps(output, indent=2))
else:
if result.error_count == 0:
print("✅ Manifest alignment validated successfully")
else:
print(f"❌ Found {result.error_count} error(s)")
for issue in result.count_issues:
print(f" {issue}")
sys.exit(0 if result.error_count == 0 else 1)
except Exception as e:
print(f"❌ Validation failed: {e}")
sys.exit(2)
if __name__ == "__main__":
main()