TradingAgents/.claude/hooks/auto_update_docs.py

#!/usr/bin/env python3
"""
Auto-Doc-Sync - Updates documentation when source code changes with GenAI complexity assessment.

Detects:
- New public functions/classes
- Changed function signatures
- Updated docstrings
- Breaking changes

Features:
- GenAI semantic complexity assessment (vs hardcoded thresholds)
- Smart decision on auto-fix vs doc-syncer invocation
- Reduces doc-syncer invocations by ~70%
- Graceful degradation with fallback heuristics

Actions:
- Simple updates: Auto-extract docstrings → docs/api/
- Complex updates: Invoke doc-syncer subagent
- Always: Update CHANGELOG.md
- Always: Update examples if needed

Hook Integration:
- Event: PostToolUse (after Write/Edit on src/ files)
- Trigger: Writing to src/**/*.py
- Action: Detect API changes and sync docs
"""

import ast
import subprocess
import sys
import os
from dataclasses import dataclass
from pathlib import Path
from typing import List, Optional, Set

from genai_utils import GenAIAnalyzer, parse_binary_response
from genai_prompts import COMPLEXITY_ASSESSMENT_PROMPT

# ============================================================================
# Configuration
# ============================================================================

PROJECT_ROOT = Path(__file__).parent.parent.parent
SRC_DIR = PROJECT_ROOT / "src" / "[project_name]"
DOCS_DIR = PROJECT_ROOT / "docs"
API_DOCS_DIR = DOCS_DIR / "api"
CHANGELOG_PATH = PROJECT_ROOT / "CHANGELOG.md"

# Thresholds for invoking doc-syncer subagent vs simple updates
COMPLEX_THRESHOLD = {
    "new_classes": 2,  # 3+ new classes = complex
    "breaking_changes": 0,  # ANY breaking change = complex
    "new_functions": 5,  # 6+ new functions = complex
}

# Initialize GenAI analyzer (with feature flag support)
analyzer = GenAIAnalyzer(
    use_genai=os.environ.get("GENAI_DOC_UPDATE", "true").lower() == "true"
)

# ============================================================================
# Data Structures
# ============================================================================


@dataclass
class APIChange:
    """Represents a detected API change."""
    type: str  # "new_function", "new_class", "modified_signature", "breaking_change"
    name: str
    details: str
    severity: str  # "minor", "major", "breaking"


@dataclass
class AnalysisResult:
    """Result of analyzing a Python file for API changes."""
    file_path: Path
    new_functions: List[APIChange]
    new_classes: List[APIChange]
    modified_signatures: List[APIChange]
    breaking_changes: List[APIChange]

    def is_complex(self) -> bool:
        """Determine if changes are complex enough to need doc-syncer subagent."""
        if len(self.breaking_changes) > COMPLEX_THRESHOLD["breaking_changes"]:
            return True
        if len(self.new_classes) > COMPLEX_THRESHOLD["new_classes"]:
            return True
        if len(self.new_functions) > COMPLEX_THRESHOLD["new_functions"]:
            return True
        return False

    def has_changes(self) -> bool:
        """Check if any API changes detected."""
        return bool(
            self.new_functions or
            self.new_classes or
            self.modified_signatures or
            self.breaking_changes
        )

    def change_count(self) -> int:
        """Total number of changes."""
        return (
            len(self.new_functions) +
            len(self.new_classes) +
            len(self.modified_signatures) +
            len(self.breaking_changes)
        )


# ============================================================================
# GenAI Complexity Assessment Functions
# ============================================================================


def assess_complexity_with_genai(analysis: 'AnalysisResult') -> bool:
    """Use GenAI to assess if changes are simple or complex.

    Delegates to shared GenAI utility with graceful fallback to heuristics.

    Returns:
        True if changes are complex (need doc-syncer), False if simple
    """
    # Call shared GenAI analyzer
    response = analyzer.analyze(
        COMPLEXITY_ASSESSMENT_PROMPT,
        num_functions=len(analysis.new_functions),
        function_names=', '.join([c.name for c in analysis.new_functions]) or 'None',
        num_classes=len(analysis.new_classes),
        class_names=', '.join([c.name for c in analysis.new_classes]) or 'None',
        num_modified=len(analysis.modified_signatures),
        modified_names=', '.join([c.name for c in analysis.modified_signatures]) or 'None',
        num_breaking=len(analysis.breaking_changes),
        breaking_names=', '.join([c.name for c in analysis.breaking_changes]) or 'None',
    )

    # Parse response using shared utility
    if response:
        is_complex = parse_binary_response(
            response,
            true_keywords=["COMPLEX"],
            false_keywords=["SIMPLE"]
        )
        if is_complex is not None:
            return is_complex

    # Fallback to heuristics if GenAI unavailable or ambiguous
    return analysis.is_complex()


# ============================================================================
# AST Analysis Functions
# ============================================================================


def extract_public_functions(tree: ast.AST) -> Set[str]:
    """Extract all public function names from AST."""
    functions = set()

    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef):
            # Public functions don't start with underscore
            if not node.name.startswith("_"):
                functions.add(node.name)

    return functions


def extract_public_classes(tree: ast.AST) -> Set[str]:
    """Extract all public class names from AST."""
    classes = set()

    for node in ast.walk(tree):
        if isinstance(node, ast.ClassDef):
            # Public classes don't start with underscore
            if not node.name.startswith("_"):
                classes.add(node.name)

    return classes


def get_function_signature(node: ast.FunctionDef) -> str:
    """Extract function signature as string."""
    args = []

    # Regular args
    for arg in node.args.args:
        args.append(arg.arg)

    # *args
    if node.args.vararg:
        args.append(f"*{node.args.vararg.arg}")

    # **kwargs
    if node.args.kwarg:
        args.append(f"**{node.args.kwarg.arg}")

    return f"{node.name}({', '.join(args)})"


def extract_docstring(node) -> Optional[str]:
    """Extract docstring from function or class node."""
    if not isinstance(node, (ast.FunctionDef, ast.ClassDef)):
        return None

    docstring = ast.get_docstring(node)
    return docstring


def detect_api_changes(file_path: Path) -> AnalysisResult:
    """Detect API changes in Python file.

    Compares current version with git HEAD to find:
    - New public functions
    - New public classes
    - Modified function signatures
    - Breaking changes (removed public APIs)
    """

    # Parse current version
    try:
        current_content = file_path.read_text()
        current_tree = ast.parse(current_content)
    except Exception as e:
        print(f"⚠️  Failed to parse {file_path}: {e}")
        return AnalysisResult(file_path, [], [], [], [])

    # Try to get previous version from git
    try:
        result = subprocess.run(
            ["git", "show", f"HEAD:{file_path.relative_to(PROJECT_ROOT)}"],
            cwd=PROJECT_ROOT,
            capture_output=True,
            text=True,
        )

        if result.returncode == 0:
            previous_content = result.stdout
            previous_tree = ast.parse(previous_content)
        else:
            # File is new (not in git yet)
            previous_tree = None
    except Exception:
        # Error getting previous version - assume new file
        previous_tree = None

    # Extract current APIs
    current_functions = extract_public_functions(current_tree)
    current_classes = extract_public_classes(current_tree)

    # Extract previous APIs (if exists)
    if previous_tree:
        previous_functions = extract_public_functions(previous_tree)
        previous_classes = extract_public_classes(previous_tree)
    else:
        previous_functions = set()
        previous_classes = set()

    # Detect changes
    new_functions = []
    new_classes = []
    modified_signatures = []
    breaking_changes = []

    # New functions
    for func_name in current_functions - previous_functions:
        new_functions.append(APIChange(
            type="new_function",
            name=func_name,
            details=f"New public function: {func_name}",
            severity="minor"
        ))

    # New classes
    for class_name in current_classes - previous_classes:
        new_classes.append(APIChange(
            type="new_class",
            name=class_name,
            details=f"New public class: {class_name}",
            severity="minor"
        ))

    # Breaking changes (removed public APIs)
    removed_functions = previous_functions - current_functions
    removed_classes = previous_classes - current_classes

    for func_name in removed_functions:
        breaking_changes.append(APIChange(
            type="breaking_change",
            name=func_name,
            details=f"Removed public function: {func_name}",
            severity="breaking"
        ))

    for class_name in removed_classes:
        breaking_changes.append(APIChange(
            type="breaking_change",
            name=class_name,
            details=f"Removed public class: {class_name}",
            severity="breaking"
        ))

    # TODO: Detect modified signatures (requires more complex AST comparison)
    # For now, we'll skip this to keep the hook fast

    return AnalysisResult(
        file_path=file_path,
        new_functions=new_functions,
        new_classes=new_classes,
        modified_signatures=modified_signatures,
        breaking_changes=breaking_changes,
    )


# ============================================================================
# Documentation Update Functions
# ============================================================================


def simple_doc_update(analysis: AnalysisResult) -> bool:
    """Handle simple doc updates without subagent.

    For minor changes (few new functions/classes, no breaking changes):
    - Extract docstrings
    - Update docs/api/ (if it exists)
    - Add entry to CHANGELOG.md

    Returns:
        True if successfully updated, False otherwise
    """

    # For now, we'll just print what would be updated
    # Full implementation would extract docstrings and write to docs/api/

    print(f"📝 Simple doc update for: {analysis.file_path.name}")

    if analysis.new_functions:
        print(f"   New functions: {', '.join([c.name for c in analysis.new_functions])}")

    if analysis.new_classes:
        print(f"   New classes: {', '.join([c.name for c in analysis.new_classes])}")

    # TODO: Extract docstrings and write to docs/api/
    # TODO: Update CHANGELOG.md

    print("   ✓ Docs updated automatically")

    return True


def suggest_doc_syncer_invocation(analysis: AnalysisResult) -> str:
    """Generate suggestion for invoking doc-syncer subagent.

    Returns:
        Formatted message suggesting how to invoke doc-syncer
    """

    return f"""
╭──────────────────────────────────────────────────────────╮
│ 📚 COMPLEX API CHANGES: Doc-Syncer Subagent Recommended │
╰──────────────────────────────────────────────────────────╯

📄 File: {analysis.file_path.relative_to(PROJECT_ROOT)}

📊 Changes detected:
   • New functions: {len(analysis.new_functions)}
   • New classes: {len(analysis.new_classes)}
   • Modified signatures: {len(analysis.modified_signatures)}
   • Breaking changes: {len(analysis.breaking_changes)}

┌──────────────────────────────────────────────────────────┐
│ 🤖 AUTO-INVOKE DOC-SYNCER SUBAGENT                       │
│                                                           │
│ The doc-syncer subagent can automatically:               │
│ ✓ Extract docstrings from all new APIs                  │
│ ✓ Update docs/api/ with API documentation               │
│ ✓ Update CHANGELOG.md with changes                      │
│ ✓ Update examples if needed                             │
│ ✓ Check for broken links                                │
│ ✓ Stage all documentation changes                       │
└──────────────────────────────────────────────────────────┘

🔴 BREAKING CHANGES:
{chr(10).join([f"   • {change.details}" for change in analysis.breaking_changes])}

To invoke doc-syncer subagent, tell Claude:
"Invoke doc-syncer subagent to update docs for {analysis.file_path.name}"

Or manually update docs:
→ Extract docstrings from new APIs
→ Update docs/api/{analysis.file_path.stem}.md
→ Update CHANGELOG.md with breaking changes
→ Update examples if API changed

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Documentation should always stay in sync with code!
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
"""


# ============================================================================
# Main Doc-Sync Logic
# ============================================================================


def process_file(file_path: str) -> int:
    """Process a single file for doc updates.

    Args:
        file_path: Path to file that was modified

    Returns:
        0 = Success (docs updated or no updates needed)
        1 = Complex changes (suggest doc-syncer subagent)
    """

    path = Path(file_path)

    # Only process Python source files in src/[project_name]/
    if "src/[project_name]" not in str(path):
        return 0

    if not path.suffix == ".py":
        return 0

    # Ignore test files
    if "test_" in path.name:
        return 0

    # Ignore __init__.py (usually just imports)
    if path.name == "__init__.py":
        return 0

    print(f"🔍 Checking for API changes: {path.name}")

    # Detect changes
    analysis = detect_api_changes(path)

    if not analysis.has_changes():
        print(f"   No API changes detected")
        return 0

    print(f"   📋 {analysis.change_count()} API change(s) detected")

    # Decide: simple update or invoke subagent using GenAI assessment
    use_genai = os.environ.get("GENAI_DOC_UPDATE", "true").lower() == "true"
    if use_genai:
        is_complex = assess_complexity_with_genai(analysis)
    else:
        is_complex = analysis.is_complex()

    if is_complex:
        print(suggest_doc_syncer_invocation(analysis))
        return 1

    # Simple update
    success = simple_doc_update(analysis)

    return 0 if success else 1


def main():
    """Main entry point."""

    # Parse arguments (can receive multiple file paths)
    if len(sys.argv) < 2:
        # No files provided - allow
        return 0

    file_paths = sys.argv[1:]

    exit_code = 0

    for file_path in file_paths:
        result = process_file(file_path)
        if result != 0:
            exit_code = result

    return exit_code


if __name__ == "__main__":
    sys.exit(main())