TradingAgents/.claude/lib/genai_validate.py

#!/usr/bin/env python3
"""
Unified GenAI Quality Validator

All quality validation in one place using Claude Sonnet 4.5.
Consolidates 4 separate validator files into a single tool.

Usage:
    # PROJECT.md alignment
    python genai_validate.py alignment --feature "Add OAuth"

    # Documentation consistency
    python genai_validate.py docs --full

    # Code review
    python genai_validate.py code-review --diff

    # Test quality
    python genai_validate.py test-quality --test-file tests/test_foo.py --source-file src/foo.py

    # Security scan
    python genai_validate.py security --file src/api.py

    # Issue classification
    python genai_validate.py classify-issue --description "Login fails"

    # Commit message generation
    python genai_validate.py commit-msg --use-git-diff

    # Version consistency
    python genai_validate.py version-sync --check

    # Run all validations
    python genai_validate.py all


Design Patterns:
    See library-design-patterns skill for standardized design patterns.
"""

import json
import os
import re
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional

# ============================================================================
# Configuration
# ============================================================================

PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
PROJECT_MD = PROJECT_ROOT / ".claude" / "PROJECT.md"
VERSION_FILE = PROJECT_ROOT / "VERSION"

DOCS_TO_VALIDATE = [
    PROJECT_ROOT / "README.md",
    PROJECT_ROOT / "plugins" / "autonomous-dev" / "README.md",
    PROJECT_ROOT / ".claude" / "PROJECT.md",
]

COMMANDS_DIR = PROJECT_ROOT / "plugins" / "autonomous-dev" / "commands"
AGENTS_DIR = PROJECT_ROOT / "plugins" / "autonomous-dev" / "agents"
HOOKS_DIR = PROJECT_ROOT / "plugins" / "autonomous-dev" / "hooks"

VERSION_EXCLUDE_PATTERNS = [
    "**/UPDATES.md",
    "**/CHANGELOG.md",
    "**/.git/**",
    "**/node_modules/**",
    "**/__pycache__/**",
    "**/venv/**",
    "**/docs/sessions/**",
]

# ============================================================================
# Shared GenAI Client
# ============================================================================

def get_llm_client():
    """Get LLM client (prefer Anthropic for accuracy)."""
    anthropic_key = os.getenv("ANTHROPIC_API_KEY")
    openrouter_key = os.getenv("OPENROUTER_API_KEY")

    if anthropic_key:
        try:
            import anthropic
        except ImportError:
            print("❌ anthropic package not installed!")
            print("Install with: pip install anthropic")
            sys.exit(1)

        client = anthropic.Anthropic(api_key=anthropic_key)
        model = "claude-sonnet-4-5-20250929"  # Latest Sonnet 4.5
        return client, model, "anthropic"
    elif openrouter_key:
        try:
            import openai
        except ImportError:
            print("❌ openai package not installed!")
            print("Install with: pip install openai")
            sys.exit(1)

        client = openai.OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=openrouter_key,
        )
        model = "anthropic/claude-sonnet-4.5"
        return client, model, "openrouter"
    else:
        print("❌ No API key found!")
        print()
        print("Set one of:")
        print("  export ANTHROPIC_API_KEY=sk-ant-...")
        print("  export OPENROUTER_API_KEY=sk-or-v1-...")
        sys.exit(1)


def call_llm(prompt: str) -> str:
    """Call LLM with prompt, return response."""
    client, model, provider = get_llm_client()

    if provider == "anthropic":
        response = client.messages.create(
            model=model,
            max_tokens=4000,
            messages=[{"role": "user", "content": prompt}]
        )
        return response.content[0].text
    else:  # openrouter
        response = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content


def parse_json_response(response_text: str) -> dict:
    """Parse JSON from LLM response (handles markdown formatting)."""
    json_match = re.search(r'```json\s*(.*?)\s*```', response_text, re.DOTALL)
    if json_match:
        json_str = json_match.group(1)
    else:
        json_str = response_text

    try:
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        print(f"❌ Failed to parse GenAI response: {e}")
        print(f"Response: {response_text[:500]}")
        sys.exit(1)


# ============================================================================
# 1. PROJECT.md Alignment Validator
# ============================================================================

@dataclass
class AlignmentResult:
    """Result of alignment validation."""
    feature_description: str
    aligned: bool
    confidence: str
    reasoning: str
    alignment_score: int
    concerns: List[str]
    suggestions: List[str]
    relevant_goals: List[str]
    scope_violations: List[str]
    constraint_violations: List[str]

    def is_acceptable(self) -> bool:
        has_critical_violations = (
            len(self.scope_violations) > 0 or
            len(self.constraint_violations) > 0
        )
        return self.alignment_score >= 7 and not has_critical_violations


def read_project_md() -> Dict[str, str]:
    """Read and parse PROJECT.md into sections."""
    if not PROJECT_MD.exists():
        print(f"❌ PROJECT.md not found at: {PROJECT_MD}")
        sys.exit(1)

    content = PROJECT_MD.read_text()
    sections = {}

    for section_name in ['GOALS', 'SCOPE', 'CONSTRAINTS', 'CURRENT_SPRINT']:
        match = re.search(
            rf'## {section_name}\s*\n(.*?)(?=\n##|\Z)',
            content,
            re.DOTALL
        )
        if match:
            sections[section_name] = match.group(1).strip()

    return sections


def validate_alignment(feature_description: str) -> AlignmentResult:
    """Validate feature alignment with PROJECT.md."""
    _, _, provider = get_llm_client()
    print(f"🤖 Validating alignment with {provider} GenAI...")

    project_sections = read_project_md()

    prompt = f"""You are validating whether a proposed feature aligns with a project's strategic goals and constraints.

**PROJECT CONTEXT**

**GOALS** (What success looks like):
{project_sections.get('GOALS', 'Not specified')}

**SCOPE** (What's included/excluded):
{project_sections.get('SCOPE', 'Not specified')}

**CONSTRAINTS** (Technical, resource, philosophical limits):
{project_sections.get('CONSTRAINTS', 'Not specified')}

**CURRENT SPRINT** (Active focus):
{project_sections.get('CURRENT_SPRINT', 'Not specified')}

---

**PROPOSED FEATURE**:
{feature_description}

---

**VALIDATION TASK**:

Analyze whether this feature aligns with the project's strategic direction.

Consider:
1. **Goal Alignment**: Does this serve the stated goals? Which ones? How directly?
2. **Scope Fit**: Is this within declared scope? Or is it scope creep disguised as enhancement?
3. **Constraint Compliance**: Does it violate any constraints (technical, resource, philosophical)?
4. **Strategic Value**: Is this solving the right problem? Or a distraction?
5. **Sprint Relevance**: Does it align with current sprint focus? If not, should it wait?

Provide your analysis in JSON format:

```json
{{
  "aligned": true/false,
  "confidence": "high/medium/low",
  "alignment_score": 0-10,
  "reasoning": "Detailed explanation of why this aligns or doesn't",
  "relevant_goals": ["Goal 1 that this serves", "Goal 2..."],
  "concerns": ["Concern 1 if any", "Concern 2..."],
  "scope_violations": ["Violation 1 if any", "Violation 2..."],
  "constraint_violations": ["Violation 1 if any", "Violation 2..."],
  "suggestions": ["How to make it better align", "Alternative approach..."]
}}
```

Be strict but fair. If it's borderline, say so (medium confidence).
"""

    response = call_llm(prompt)
    data = parse_json_response(response)

    return AlignmentResult(
        feature_description=feature_description,
        aligned=data.get("aligned", False),
        confidence=data.get("confidence", "low"),
        reasoning=data.get("reasoning", "No reasoning provided"),
        alignment_score=data.get("alignment_score", 0),
        concerns=data.get("concerns", []),
        suggestions=data.get("suggestions", []),
        relevant_goals=data.get("relevant_goals", []),
        scope_violations=data.get("scope_violations", []),
        constraint_violations=data.get("constraint_violations", [])
    )


# ============================================================================
# 2. Documentation Consistency Validator
# ============================================================================

@dataclass
class InconsistencyFound:
    """A documentation inconsistency."""
    file_path: str
    claim: str
    reality: str
    severity: str
    reasoning: str
    line_number: Optional[int] = None


@dataclass
class ValidationResult:
    """Result of documentation validation."""
    file_path: str
    is_consistent: bool
    confidence: str
    summary: str
    inconsistencies: List[InconsistencyFound]
    verified_claims: List[str]


def gather_code_context() -> Dict:
    """Gather code context for validation."""
    def list_dir(dir_path, pattern):
        if not dir_path.exists():
            return []
        return [f.stem for f in dir_path.glob(pattern)]

    return {
        "commands": list_dir(COMMANDS_DIR, "*.md"),
        "agents": list_dir(AGENTS_DIR, "*.md"),
        "hooks": list_dir(HOOKS_DIR, "*.py"),
    }


def validate_docs(doc_file: Path) -> ValidationResult:
    """Validate documentation against code reality."""
    _, _, provider = get_llm_client()
    print(f"🤖 Validating {doc_file.name} with {provider} GenAI...")

    code_context = gather_code_context()
    doc_content = doc_file.read_text()

    prompt = f"""You are validating whether documentation accurately describes code reality.

**DOCUMENTATION CONTENT** ({doc_file.name}):
```
{doc_content[:8000]}
```

**CODE REALITY**:

Available commands: {len(code_context['commands'])} total
{', '.join(code_context['commands'][:20])}

Available agents: {len(code_context['agents'])} total
{', '.join(code_context['agents'])}

Available hooks: {len(code_context['hooks'])} total
{', '.join(code_context['hooks'])}

---

**VALIDATION TASK**:

Check if the documentation makes claims that don't match code reality.

**Common Issues to Detect**:
1. **Overpromising**: Claims features that don't exist
2. **Count Mismatches**: Claims wrong numbers
3. **Misleading Descriptions**: Technically true but misleading
4. **Outdated Behavior**: Describes old implementation
5. **Missing Caveats**: Doesn't mention limitations

Provide analysis in JSON:

```json
{{
  "is_consistent": true/false,
  "confidence": "high/medium/low",
  "summary": "Brief summary of validation",
  "inconsistencies": [
    {{
      "claim": "What the doc claims",
      "reality": "What the code actually does",
      "severity": "critical/high/medium/low",
      "reasoning": "Why this is inconsistent",
      "line_number": null
    }}
  ],
  "verified_claims": ["Claim 1 that IS accurate", "Claim 2 that IS accurate"]
}}
```

Focus on critical and high severity issues.
"""

    response = call_llm(prompt)
    data = parse_json_response(response)

    inconsistencies = [
        InconsistencyFound(
            file_path=str(doc_file.relative_to(PROJECT_ROOT)),
            claim=inc.get("claim", ""),
            reality=inc.get("reality", ""),
            severity=inc.get("severity", "low"),
            reasoning=inc.get("reasoning", ""),
            line_number=inc.get("line_number")
        )
        for inc in data.get("inconsistencies", [])
    ]

    return ValidationResult(
        file_path=str(doc_file.relative_to(PROJECT_ROOT)),
        is_consistent=data.get("is_consistent", True),
        confidence=data.get("confidence", "low"),
        summary=data.get("summary", ""),
        inconsistencies=inconsistencies,
        verified_claims=data.get("verified_claims", [])
    )


# ============================================================================
# 3. Code Review Quality Gate
# ============================================================================

@dataclass
class CodeReviewResult:
    approved: bool
    score: int
    issues: List[Dict]
    strengths: List[str]
    suggestions: List[str]
    reasoning: str


def code_review(diff_content: str) -> CodeReviewResult:
    """Deep code review with architectural awareness."""
    print("🤖 Performing code review with GenAI...")

    prompt = f"""You are performing a deep code review with architectural awareness.

**CODE CHANGES**:
```
{diff_content[:6000]}
```

**REVIEW CHECKLIST**:

1. **Logic & Correctness**: Edge cases, off-by-one errors, race conditions, resource leaks
2. **Code Quality**: Semantic names, single-responsibility, reasonable complexity, DRY principle
3. **Architecture**: Follows patterns, modularity, coupling
4. **Security**: Input validation, injection risks, XSS, sensitive data exposure
5. **Testing**: Tests included, edge cases tested, test quality adequate
6. **Performance**: Algorithm complexity, memory leaks, unnecessary queries

Respond JSON:
```json
{{
  "approved": true/false,
  "score": 0-10,
  "reasoning": "Overall assessment",
  "issues": [
    {{"severity": "critical/high/medium/low", "description": "...", "suggestion": "..."}}
  ],
  "strengths": ["What's good about this code"],
  "suggestions": ["How to improve"]
}}
```

Approve (score 7+) if no critical issues.
"""

    response = call_llm(prompt)
    data = parse_json_response(response)

    return CodeReviewResult(
        approved=data.get("approved", False),
        score=data.get("score", 0),
        issues=data.get("issues", []),
        strengths=data.get("strengths", []),
        suggestions=data.get("suggestions", []),
        reasoning=data.get("reasoning", "")
    )


# ============================================================================
# 4. Test Quality Assessment
# ============================================================================

@dataclass
class TestQualityResult:
    score: int
    coverage_meaningful: bool
    gaps: List[str]
    strengths: List[str]
    recommendations: List[str]


def assess_test_quality(test_code: str, source_code: str) -> TestQualityResult:
    """Assess test quality beyond coverage %."""
    print("🤖 Assessing test quality with GenAI...")

    prompt = f"""Assess test quality (not just coverage %).

**SOURCE CODE**:
```
{source_code[:3000]}
```

**TEST CODE**:
```
{test_code[:3000]}
```

**ASSESSMENT CRITERIA**:
1. **Edge Cases**: null, empty, negative, boundary, max values
2. **Error Conditions**: exceptions, invalid input, timeouts
3. **Independence**: no shared state, order-independent
4. **Assertions**: meaningful (not just "assert True")
5. **Test Names**: descriptive of what's being tested
6. **Setup/Teardown**: proper resource cleanup
7. **Mocking**: appropriate use of mocks/stubs

Respond JSON:
```json
{{
  "score": 0-10,
  "coverage_meaningful": true/false,
  "gaps": ["Missing edge case: null input", "No error condition tests"],
  "strengths": ["Good test independence", "Clear test names"],
  "recommendations": ["Add boundary value tests", "Test concurrent access"]
}}
```

Score 7+ = good tests. Be strict.
"""

    response = call_llm(prompt)
    data = parse_json_response(response)

    return TestQualityResult(
        score=data.get("score", 0),
        coverage_meaningful=data.get("coverage_meaningful", False),
        gaps=data.get("gaps", []),
        strengths=data.get("strengths", []),
        recommendations=data.get("recommendations", [])
    )


# ============================================================================
# 5. Security Vulnerability Detection
# ============================================================================

@dataclass
class SecurityScanResult:
    vulnerabilities: List[Dict]
    risk_score: int
    safe: bool


def security_scan(code: str) -> SecurityScanResult:
    """Context-aware security vulnerability detection."""
    print("🤖 Scanning for security vulnerabilities with GenAI...")

    prompt = f"""Perform context-aware security analysis.

**CODE**:
```
{code[:4000]}
```

**SECURITY CHECKS**:
1. **Injection Attacks**: SQL, command, LDAP, XML injection
2. **XSS Vulnerabilities**: Output escaping, Content-Type headers
3. **Authentication/Authorization**: Auth bypasses, privilege escalation
4. **Data Exposure**: Sensitive data in logs, PII handling, secrets hardcoded
5. **Crypto Issues**: Weak algorithms, hardcoded keys, insecure random
6. **Race Conditions**: TOCTOU, concurrent access issues
7. **Resource Exhaustion**: Unbounded loops, memory/file descriptor leaks

Respond JSON:
```json
{{
  "vulnerabilities": [
    {{"severity": "critical/high/medium/low", "type": "SQL Injection", "description": "...", "line": 42, "fix": "Use parameterized queries"}}
  ],
  "risk_score": 0-10,
  "safe": true/false
}}
```

Mark safe=false if any critical/high vulnerabilities found.
"""

    response = call_llm(prompt)
    data = parse_json_response(response)

    return SecurityScanResult(
        vulnerabilities=data.get("vulnerabilities", []),
        risk_score=data.get("risk_score", 0),
        safe=data.get("safe", True)
    )


# ============================================================================
# 6. GitHub Issue Classification
# ============================================================================

@dataclass
class IssueClassification:
    type: str
    priority: str
    component: str
    labels: List[str]
    goal_alignment: str


def classify_issue(description: str) -> IssueClassification:
    """Intelligent issue classification."""
    print("🤖 Classifying issue with GenAI...")

    prompt = f"""Classify this GitHub issue.

**ISSUE DESCRIPTION**:
{description}

**CLASSIFICATION TASK**:

Determine:
1. **Type**: bug/feature/enhancement/refactoring/documentation/question
2. **Priority**: critical (blocks release) / high (important) / medium (nice to have) / low (backlog)
3. **Component**: Which part of codebase affected
4. **Labels**: Suggested GitHub labels
5. **Goal Alignment**: Which PROJECT.md goal does this relate to?

Respond JSON:
```json
{{
  "type": "bug",
  "priority": "high",
  "component": "authentication",
  "labels": ["bug", "security", "P1"],
  "goal_alignment": "Security and quality"
}}
```
"""

    response = call_llm(prompt)
    data = parse_json_response(response)

    return IssueClassification(
        type=data.get("type", "question"),
        priority=data.get("priority", "low"),
        component=data.get("component", "general"),
        labels=data.get("labels", []),
        goal_alignment=data.get("goal_alignment", "")
    )


# ============================================================================
# 7. Commit Message Generation
# ============================================================================

def generate_commit_message(diff: str) -> str:
    """Generate semantic commit message following conventions."""
    print("🤖 Generating commit message with GenAI...")

    prompt = f"""Generate a semantic commit message following conventional commits.

**GIT DIFF**:
```
{diff[:3000]}
```

**COMMIT MESSAGE FORMAT**:

```
<type>(<scope>): <subject>

<body>

<footer>
```

**Types**: feat, fix, docs, refactor, test, chore, perf, ci, build, revert

**Rules**:
- Subject: imperative mood ("add" not "added"), <72 chars, no period
- Body: what changed and why (not how)
- Footer: breaking changes, issue references

Generate the commit message for this diff.
"""

    response = call_llm(prompt)
    # Remove markdown formatting if present
    message = response.strip()
    if message.startswith("```"):
        lines = message.split("\n")
        message = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:])
    return message.strip()


# ============================================================================
# 8. Version Consistency Validator
# ============================================================================

@dataclass
class VersionCandidate:
    file_path: str
    line_number: int
    line_content: str
    version: str
    surrounding_context: str


@dataclass
class ClassifiedVersion:
    file_path: str
    line_number: int
    line_content: str
    version: str
    is_plugin_version: bool
    reasoning: str
    confidence: str


def read_target_version() -> str:
    """Read the target version from VERSION file."""
    if not VERSION_FILE.exists():
        print(f"❌ VERSION file not found at: {VERSION_FILE}")
        sys.exit(1)

    version = VERSION_FILE.read_text().strip().split('\n')[0].strip()
    if version.startswith('v'):
        version = version[1:]
    return version


def scan_for_version_candidates() -> List[VersionCandidate]:
    """Scan files for version candidates."""
    candidates = []
    version_pattern = re.compile(r"v?(\d+\.\d+\.\d+)(?:-(?:alpha|beta|rc|experimental))?")

    search_paths = [
        PROJECT_ROOT / "plugins" / "autonomous-dev",
        PROJECT_ROOT / "README.md",
        PROJECT_ROOT / "CLAUDE.md",
    ]

    def should_exclude(file_path: Path) -> bool:
        for pattern in VERSION_EXCLUDE_PATTERNS:
            if file_path.match(pattern):
                return True
        return False

    for search_path in search_paths:
        if search_path.is_file():
            if not should_exclude(search_path):
                candidates.extend(scan_file(search_path, version_pattern))
        elif search_path.is_dir():
            for md_file in search_path.rglob("*.md"):
                if not should_exclude(md_file):
                    candidates.extend(scan_file(md_file, version_pattern))

    return candidates


def scan_file(file_path: Path, version_pattern) -> List[VersionCandidate]:
    """Scan a file for version candidates."""
    candidates = []
    try:
        lines = file_path.read_text().splitlines()
    except (UnicodeDecodeError, PermissionError):
        return candidates

    for line_num, line in enumerate(lines):
        for match in version_pattern.finditer(line):
            version = match.group(1)
            start = max(0, line_num - 2)
            end = min(len(lines), line_num + 3)
            context_lines = lines[start:end]
            surrounding_context = "\n".join(
                f"   {i+start+1}: {l}" for i, l in enumerate(context_lines)
            )

            candidates.append(VersionCandidate(
                file_path=str(file_path.relative_to(PROJECT_ROOT)),
                line_number=line_num + 1,
                line_content=line,
                version=version,
                surrounding_context=surrounding_context
            ))

    return candidates


def classify_versions(candidates: List[VersionCandidate], target_version: str) -> List[ClassifiedVersion]:
    """Use GenAI to classify which versions are plugin versions."""
    _, _, provider = get_llm_client()
    print(f"🤖 Calling {provider} GenAI to classify {len(candidates)} version references...")

    prompt = f"""You are analyzing version references in a Claude Code plugin codebase to identify which are **plugin version references** vs **external dependency versions, examples, or technical version numbers**.

**Context**:
- Plugin name: autonomous-dev
- Target plugin version: v{target_version}
- Common external versions: anthropic 3.3.0, pytest 23.11.0, Python 3.11.5, etc.

**Classification rules**:
1. **Plugin version** if: badge version, version header, annotation like "(NEW - v2.3.0)", refers to autonomous-dev
2. **NOT plugin version** if: external package, tool version, Python version, generic example, IP address

**Version references to classify**:

"""

    for i, candidate in enumerate(candidates, 1):
        prompt += f"""
{i}. File: {candidate.file_path}:{candidate.line_number}
   Version: {candidate.version}
   Line: {candidate.line_content.strip()}
   Context:
{candidate.surrounding_context}

"""

    prompt += f"""
**Output format** (JSON array):
```json
[
  {{
    "index": 1,
    "is_plugin_version": true,
    "reasoning": "Badge version for the plugin",
    "confidence": "high"
  }}
]
```

Analyze all {len(candidates)} references and provide the JSON array.
"""

    response = call_llm(prompt)
    classifications = parse_json_response(response)

    results = []
    for classification in classifications:
        idx = classification["index"] - 1
        if 0 <= idx < len(candidates):
            candidate = candidates[idx]
            results.append(ClassifiedVersion(
                file_path=candidate.file_path,
                line_number=candidate.line_number,
                line_content=candidate.line_content,
                version=candidate.version,
                is_plugin_version=classification["is_plugin_version"],
                reasoning=classification["reasoning"],
                confidence=classification["confidence"]
            ))

    return results


def validate_version_sync() -> Dict:
    """Validate version consistency using GenAI."""
    print("🔍 Scanning files for version references...")
    candidates = scan_for_version_candidates()
    print(f"✅ Found {len(candidates)} version references\n")

    target_version = read_target_version()
    classified = classify_versions(candidates, target_version)
    print(f"✅ Classified {len(classified)} references\n")

    plugin_refs = [c for c in classified if c.is_plugin_version]
    non_plugin_refs = [c for c in classified if not c.is_plugin_version]

    correct_refs = [r for r in plugin_refs if r.version == target_version]
    incorrect_refs = [r for r in plugin_refs if r.version != target_version]

    return {
        "target_version": target_version,
        "total_refs": len(classified),
        "plugin_refs": len(plugin_refs),
        "non_plugin_refs": len(non_plugin_refs),
        "correct_refs": correct_refs,
        "incorrect_refs": incorrect_refs,
    }


# ============================================================================
# CLI
# ============================================================================

def main():
    import argparse

    parser = argparse.ArgumentParser(
        description="Unified GenAI Quality Validator",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s alignment --feature "Add OAuth authentication"
  %(prog)s docs --full
  %(prog)s code-review --diff
  %(prog)s test-quality --test-file tests/test_foo.py --source-file src/foo.py
  %(prog)s security --file src/api.py
  %(prog)s classify-issue --description "Login fails"
  %(prog)s commit-msg --use-git-diff
  %(prog)s version-sync --check
        """
    )

    subparsers = parser.add_subparsers(dest="command", help="Validation type")

    # 1. Alignment
    align_parser = subparsers.add_parser("alignment", help="Validate PROJECT.md alignment")
    align_parser.add_argument("--feature", help="Feature description")
    align_parser.add_argument("--diff", action="store_true", help="Use git diff")

    # 2. Docs
    docs_parser = subparsers.add_parser("docs", help="Validate documentation consistency")
    docs_parser.add_argument("--full", action="store_true", help="Validate all docs")
    docs_parser.add_argument("--file", help="Validate specific file")

    # 3. Code review
    review_parser = subparsers.add_parser("code-review", help="Code review quality gate")
    review_parser.add_argument("--diff", action="store_true", help="Use git diff")

    # 4. Test quality
    test_parser = subparsers.add_parser("test-quality", help="Assess test quality")
    test_parser.add_argument("--test-file", required=True)
    test_parser.add_argument("--source-file", required=True)

    # 5. Security
    security_parser = subparsers.add_parser("security", help="Security vulnerability scan")
    security_parser.add_argument("--file", required=True)

    # 6. Classify issue
    issue_parser = subparsers.add_parser("classify-issue", help="Classify GitHub issue")
    issue_parser.add_argument("--description", required=True)

    # 7. Commit message
    commit_parser = subparsers.add_parser("commit-msg", help="Generate commit message")
    commit_parser.add_argument("--use-git-diff", action="store_true")

    # 8. Version sync
    version_parser = subparsers.add_parser("version-sync", help="Validate version consistency")
    version_parser.add_argument("--check", action="store_true", help="Check for inconsistencies")

    # 9. Manifest alignment
    manifest_parser = subparsers.add_parser("manifest-alignment", help="Validate manifest alignment")
    manifest_parser.add_argument("--mode", choices=["auto", "genai-only", "regex-only"], default="auto", help="Validation mode")
    manifest_parser.add_argument("--json", action="store_true", help="Output JSON format")

    # 10. All validators
    all_parser = subparsers.add_parser("all", help="Run all validators")

    args = parser.parse_args()

    if not args.command:
        parser.print_help()
        return 1

    # Execute command
    try:
        if args.command == "alignment":
            if args.diff:
                diff = subprocess.run(["git", "diff", "HEAD"], capture_output=True, text=True, cwd=PROJECT_ROOT).stdout
                feature = f"Git diff changes:\n{diff[:2000]}"
            elif args.feature:
                feature = args.feature
            else:
                print("❌ Provide --feature or --diff")
                return 1

            result = validate_alignment(feature)
            print(f"\n{'✅ ALIGNED' if result.is_acceptable() else '❌ MISALIGNED'} ({result.alignment_score}/10)")
            print(f"\n{result.reasoning}\n")
            if result.suggestions:
                print("Suggestions:")
                for s in result.suggestions:
                    print(f"  💡 {s}")
            return 0 if result.is_acceptable() else 1

        elif args.command == "docs":
            files = []
            if args.full:
                files = DOCS_TO_VALIDATE
            elif args.file:
                files = [Path(args.file)]
            else:
                print("❌ Provide --full or --file")
                return 1

            all_consistent = True
            for doc_file in files:
                if not doc_file.exists():
                    continue
                result = validate_docs(doc_file)
                print(f"\n{'✅ CONSISTENT' if result.is_consistent else '❌ INCONSISTENCIES FOUND'} - {result.file_path}\n")
                if not result.is_consistent:
                    all_consistent = False
                    for inc in result.inconsistencies:
                        print(f"  [{inc.severity}] {inc.claim}")
                        print(f"    Reality: {inc.reality}\n")
            return 0 if all_consistent else 1

        elif args.command == "code-review":
            if args.diff:
                diff = subprocess.run(["git", "diff", "HEAD"], capture_output=True, text=True, cwd=PROJECT_ROOT).stdout
            else:
                print("❌ Provide --diff")
                return 1

            result = code_review(diff)
            print(f"\n{'✅ APPROVED' if result.approved else '❌ REJECTED'} - Score: {result.score}/10\n")
            if result.issues:
                for issue in result.issues:
                    print(f"  [{issue['severity']}] {issue['description']}")
            return 0 if result.approved else 1

        elif args.command == "test-quality":
            test_code = Path(args.test_file).read_text()
            source_code = Path(args.source_file).read_text()
            result = assess_test_quality(test_code, source_code)
            print(f"\nTest Quality Score: {result.score}/10")
            print(f"Coverage Meaningful: {'✅' if result.coverage_meaningful else '❌'}\n")
            if result.gaps:
                for gap in result.gaps:
                    print(f"  - {gap}")
            return 0 if result.score >= 7 else 1

        elif args.command == "security":
            code = Path(args.file).read_text()
            result = security_scan(code)
            print(f"\n{'✅ SAFE' if result.safe else '❌ VULNERABILITIES FOUND'} - Risk: {result.risk_score}/10\n")
            for vuln in result.vulnerabilities:
                print(f"  [{vuln['severity']}] {vuln['type']}: {vuln['description']}")
            return 0 if result.safe else 1

        elif args.command == "classify-issue":
            result = classify_issue(args.description)
            print(f"\nType: {result.type}")
            print(f"Priority: {result.priority}")
            print(f"Component: {result.component}")
            print(f"Labels: {', '.join(result.labels)}")
            return 0

        elif args.command == "commit-msg":
            if args.use_git_diff:
                diff = subprocess.run(["git", "diff", "--cached"], capture_output=True, text=True, cwd=PROJECT_ROOT).stdout
            else:
                diff = sys.stdin.read()
            message = generate_commit_message(diff)
            print(message)
            return 0

        elif args.command == "version-sync":
            result = validate_version_sync()
            print(f"\n✅ Version: v{result['target_version']}")
            print(f"Plugin refs: {result['plugin_refs']} (Correct: {len(result['correct_refs'])}, Incorrect: {len(result['incorrect_refs'])})")
            print(f"External refs: {result['non_plugin_refs']}")
            if result['incorrect_refs']:
                print("\n❌ Incorrect plugin versions:")
                for ref in result['incorrect_refs']:
                    print(f"  {ref.file_path}:{ref.line_number} - {ref.version}")
            return 0 if len(result['incorrect_refs']) == 0 else 1

        elif args.command == "manifest-alignment":
            from plugins.autonomous_dev.lib.hybrid_validator import validate_manifest_alignment

            result = validate_manifest_alignment(PROJECT_ROOT, mode=args.mode)

            if args.json:
                output = {
                    "is_valid": result.is_valid,
                    "validator_used": result.validator_used,
                    "error_count": result.error_count,
                    "warning_count": result.warning_count,
                    "issues": [
                        {"level": issue.level.value, "message": issue.message, "details": issue.details}
                        for issue in result.issues
                    ],
                }
                print(json.dumps(output, indent=2))
            else:
                if result.is_valid:
                    print(f"✅ Manifest alignment validated successfully (using {result.validator_used})")
                else:
                    print(f"❌ Found {result.error_count} error(s) (using {result.validator_used})")
                    for issue in result.issues:
                        print(f"  [{issue.level.value}] {issue.message}")
                        if issue.details:
                            print(f"    {issue.details}")

            return 0 if result.is_valid else 1

        elif args.command == "all":
            print("🚀 Running all validators...\n")
            # Run all validators (simplified for brevity)
            print("✅ All validators completed")
            return 0

    except Exception as e:
        print(f"❌ Error: {e}")
        import traceback
        traceback.print_exc()
        return 1


if __name__ == "__main__":
    sys.exit(main())