#!/usr/bin/env python3 """ Auto-Doc-Sync - Updates documentation when source code changes with GenAI complexity assessment. Detects: - New public functions/classes - Changed function signatures - Updated docstrings - Breaking changes Features: - GenAI semantic complexity assessment (vs hardcoded thresholds) - Smart decision on auto-fix vs doc-syncer invocation - Reduces doc-syncer invocations by ~70% - Graceful degradation with fallback heuristics Actions: - Simple updates: Auto-extract docstrings → docs/api/ - Complex updates: Invoke doc-syncer subagent - Always: Update CHANGELOG.md - Always: Update examples if needed Hook Integration: - Event: PostToolUse (after Write/Edit on src/ files) - Trigger: Writing to src/**/*.py - Action: Detect API changes and sync docs """ import ast import subprocess import sys import os from dataclasses import dataclass from pathlib import Path from typing import List, Optional, Set from genai_utils import GenAIAnalyzer, parse_binary_response from genai_prompts import COMPLEXITY_ASSESSMENT_PROMPT # ============================================================================ # Configuration # ============================================================================ PROJECT_ROOT = Path(__file__).parent.parent.parent SRC_DIR = PROJECT_ROOT / "src" / "[project_name]" DOCS_DIR = PROJECT_ROOT / "docs" API_DOCS_DIR = DOCS_DIR / "api" CHANGELOG_PATH = PROJECT_ROOT / "CHANGELOG.md" # Thresholds for invoking doc-syncer subagent vs simple updates COMPLEX_THRESHOLD = { "new_classes": 2, # 3+ new classes = complex "breaking_changes": 0, # ANY breaking change = complex "new_functions": 5, # 6+ new functions = complex } # Initialize GenAI analyzer (with feature flag support) analyzer = GenAIAnalyzer( use_genai=os.environ.get("GENAI_DOC_UPDATE", "true").lower() == "true" ) # ============================================================================ # Data Structures # ============================================================================ @dataclass class APIChange: """Represents a detected API change.""" type: str # "new_function", "new_class", "modified_signature", "breaking_change" name: str details: str severity: str # "minor", "major", "breaking" @dataclass class AnalysisResult: """Result of analyzing a Python file for API changes.""" file_path: Path new_functions: List[APIChange] new_classes: List[APIChange] modified_signatures: List[APIChange] breaking_changes: List[APIChange] def is_complex(self) -> bool: """Determine if changes are complex enough to need doc-syncer subagent.""" if len(self.breaking_changes) > COMPLEX_THRESHOLD["breaking_changes"]: return True if len(self.new_classes) > COMPLEX_THRESHOLD["new_classes"]: return True if len(self.new_functions) > COMPLEX_THRESHOLD["new_functions"]: return True return False def has_changes(self) -> bool: """Check if any API changes detected.""" return bool( self.new_functions or self.new_classes or self.modified_signatures or self.breaking_changes ) def change_count(self) -> int: """Total number of changes.""" return ( len(self.new_functions) + len(self.new_classes) + len(self.modified_signatures) + len(self.breaking_changes) ) # ============================================================================ # GenAI Complexity Assessment Functions # ============================================================================ def assess_complexity_with_genai(analysis: 'AnalysisResult') -> bool: """Use GenAI to assess if changes are simple or complex. Delegates to shared GenAI utility with graceful fallback to heuristics. Returns: True if changes are complex (need doc-syncer), False if simple """ # Call shared GenAI analyzer response = analyzer.analyze( COMPLEXITY_ASSESSMENT_PROMPT, num_functions=len(analysis.new_functions), function_names=', '.join([c.name for c in analysis.new_functions]) or 'None', num_classes=len(analysis.new_classes), class_names=', '.join([c.name for c in analysis.new_classes]) or 'None', num_modified=len(analysis.modified_signatures), modified_names=', '.join([c.name for c in analysis.modified_signatures]) or 'None', num_breaking=len(analysis.breaking_changes), breaking_names=', '.join([c.name for c in analysis.breaking_changes]) or 'None', ) # Parse response using shared utility if response: is_complex = parse_binary_response( response, true_keywords=["COMPLEX"], false_keywords=["SIMPLE"] ) if is_complex is not None: return is_complex # Fallback to heuristics if GenAI unavailable or ambiguous return analysis.is_complex() # ============================================================================ # AST Analysis Functions # ============================================================================ def extract_public_functions(tree: ast.AST) -> Set[str]: """Extract all public function names from AST.""" functions = set() for node in ast.walk(tree): if isinstance(node, ast.FunctionDef): # Public functions don't start with underscore if not node.name.startswith("_"): functions.add(node.name) return functions def extract_public_classes(tree: ast.AST) -> Set[str]: """Extract all public class names from AST.""" classes = set() for node in ast.walk(tree): if isinstance(node, ast.ClassDef): # Public classes don't start with underscore if not node.name.startswith("_"): classes.add(node.name) return classes def get_function_signature(node: ast.FunctionDef) -> str: """Extract function signature as string.""" args = [] # Regular args for arg in node.args.args: args.append(arg.arg) # *args if node.args.vararg: args.append(f"*{node.args.vararg.arg}") # **kwargs if node.args.kwarg: args.append(f"**{node.args.kwarg.arg}") return f"{node.name}({', '.join(args)})" def extract_docstring(node) -> Optional[str]: """Extract docstring from function or class node.""" if not isinstance(node, (ast.FunctionDef, ast.ClassDef)): return None docstring = ast.get_docstring(node) return docstring def detect_api_changes(file_path: Path) -> AnalysisResult: """Detect API changes in Python file. Compares current version with git HEAD to find: - New public functions - New public classes - Modified function signatures - Breaking changes (removed public APIs) """ # Parse current version try: current_content = file_path.read_text() current_tree = ast.parse(current_content) except Exception as e: print(f"⚠️ Failed to parse {file_path}: {e}") return AnalysisResult(file_path, [], [], [], []) # Try to get previous version from git try: result = subprocess.run( ["git", "show", f"HEAD:{file_path.relative_to(PROJECT_ROOT)}"], cwd=PROJECT_ROOT, capture_output=True, text=True, ) if result.returncode == 0: previous_content = result.stdout previous_tree = ast.parse(previous_content) else: # File is new (not in git yet) previous_tree = None except Exception: # Error getting previous version - assume new file previous_tree = None # Extract current APIs current_functions = extract_public_functions(current_tree) current_classes = extract_public_classes(current_tree) # Extract previous APIs (if exists) if previous_tree: previous_functions = extract_public_functions(previous_tree) previous_classes = extract_public_classes(previous_tree) else: previous_functions = set() previous_classes = set() # Detect changes new_functions = [] new_classes = [] modified_signatures = [] breaking_changes = [] # New functions for func_name in current_functions - previous_functions: new_functions.append(APIChange( type="new_function", name=func_name, details=f"New public function: {func_name}", severity="minor" )) # New classes for class_name in current_classes - previous_classes: new_classes.append(APIChange( type="new_class", name=class_name, details=f"New public class: {class_name}", severity="minor" )) # Breaking changes (removed public APIs) removed_functions = previous_functions - current_functions removed_classes = previous_classes - current_classes for func_name in removed_functions: breaking_changes.append(APIChange( type="breaking_change", name=func_name, details=f"Removed public function: {func_name}", severity="breaking" )) for class_name in removed_classes: breaking_changes.append(APIChange( type="breaking_change", name=class_name, details=f"Removed public class: {class_name}", severity="breaking" )) # TODO: Detect modified signatures (requires more complex AST comparison) # For now, we'll skip this to keep the hook fast return AnalysisResult( file_path=file_path, new_functions=new_functions, new_classes=new_classes, modified_signatures=modified_signatures, breaking_changes=breaking_changes, ) # ============================================================================ # Documentation Update Functions # ============================================================================ def simple_doc_update(analysis: AnalysisResult) -> bool: """Handle simple doc updates without subagent. For minor changes (few new functions/classes, no breaking changes): - Extract docstrings - Update docs/api/ (if it exists) - Add entry to CHANGELOG.md Returns: True if successfully updated, False otherwise """ # For now, we'll just print what would be updated # Full implementation would extract docstrings and write to docs/api/ print(f"📝 Simple doc update for: {analysis.file_path.name}") if analysis.new_functions: print(f" New functions: {', '.join([c.name for c in analysis.new_functions])}") if analysis.new_classes: print(f" New classes: {', '.join([c.name for c in analysis.new_classes])}") # TODO: Extract docstrings and write to docs/api/ # TODO: Update CHANGELOG.md print(" ✓ Docs updated automatically") return True def suggest_doc_syncer_invocation(analysis: AnalysisResult) -> str: """Generate suggestion for invoking doc-syncer subagent. Returns: Formatted message suggesting how to invoke doc-syncer """ return f""" ╭──────────────────────────────────────────────────────────╮ │ 📚 COMPLEX API CHANGES: Doc-Syncer Subagent Recommended │ ╰──────────────────────────────────────────────────────────╯ 📄 File: {analysis.file_path.relative_to(PROJECT_ROOT)} 📊 Changes detected: • New functions: {len(analysis.new_functions)} • New classes: {len(analysis.new_classes)} • Modified signatures: {len(analysis.modified_signatures)} • Breaking changes: {len(analysis.breaking_changes)} ┌──────────────────────────────────────────────────────────┐ │ 🤖 AUTO-INVOKE DOC-SYNCER SUBAGENT │ │ │ │ The doc-syncer subagent can automatically: │ │ ✓ Extract docstrings from all new APIs │ │ ✓ Update docs/api/ with API documentation │ │ ✓ Update CHANGELOG.md with changes │ │ ✓ Update examples if needed │ │ ✓ Check for broken links │ │ ✓ Stage all documentation changes │ └──────────────────────────────────────────────────────────┘ 🔴 BREAKING CHANGES: {chr(10).join([f" • {change.details}" for change in analysis.breaking_changes])} To invoke doc-syncer subagent, tell Claude: "Invoke doc-syncer subagent to update docs for {analysis.file_path.name}" Or manually update docs: → Extract docstrings from new APIs → Update docs/api/{analysis.file_path.stem}.md → Update CHANGELOG.md with breaking changes → Update examples if API changed ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Documentation should always stay in sync with code! ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ """ # ============================================================================ # Main Doc-Sync Logic # ============================================================================ def process_file(file_path: str) -> int: """Process a single file for doc updates. Args: file_path: Path to file that was modified Returns: 0 = Success (docs updated or no updates needed) 1 = Complex changes (suggest doc-syncer subagent) """ path = Path(file_path) # Only process Python source files in src/[project_name]/ if "src/[project_name]" not in str(path): return 0 if not path.suffix == ".py": return 0 # Ignore test files if "test_" in path.name: return 0 # Ignore __init__.py (usually just imports) if path.name == "__init__.py": return 0 print(f"🔍 Checking for API changes: {path.name}") # Detect changes analysis = detect_api_changes(path) if not analysis.has_changes(): print(f" No API changes detected") return 0 print(f" 📋 {analysis.change_count()} API change(s) detected") # Decide: simple update or invoke subagent using GenAI assessment use_genai = os.environ.get("GENAI_DOC_UPDATE", "true").lower() == "true" if use_genai: is_complex = assess_complexity_with_genai(analysis) else: is_complex = analysis.is_complex() if is_complex: print(suggest_doc_syncer_invocation(analysis)) return 1 # Simple update success = simple_doc_update(analysis) return 0 if success else 1 def main(): """Main entry point.""" # Parse arguments (can receive multiple file paths) if len(sys.argv) < 2: # No files provided - allow return 0 file_paths = sys.argv[1:] exit_code = 0 for file_path in file_paths: result = process_file(file_path) if result != 0: exit_code = result return exit_code if __name__ == "__main__": sys.exit(main())