TradingAgents/.claude/hooks/validate_docs_consistency.py

373 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Documentation Consistency Validation Hook - Layer 3 Defense with GenAI Semantic Validation
This pre-commit hook validates that documentation stays in sync with code.
It's OPTIONAL - can be annoying to block commits, but catches drift early.
Features:
- Count validation (exact matches)
- GenAI semantic validation of descriptions (accuracy checking)
- Catches misleading or inaccurate documentation
- Graceful degradation with fallback heuristics
Enable via:
.claude/settings.local.json:
{
"hooks": {
"PreCommit": {
"*": ["python .claude/hooks/validate_docs_consistency.py"]
}
}
}
Or via git pre-commit hook:
ln -s ../../.claude/hooks/validate_docs_consistency.py .git/hooks/pre-commit
What it checks:
- README.md skill/agent/command counts match reality
- GenAI validates descriptions match actual functionality
- Cross-document consistency (SYNC-STATUS, UPDATES, marketplace.json)
- No references to non-existent skills
- marketplace.json metrics match actual counts
Exit codes:
- 0: All checks passed
- 1: Documentation inconsistency detected (blocks commit)
"""
import sys
import json
import re
import os
from pathlib import Path
from typing import Tuple
from genai_utils import GenAIAnalyzer, parse_binary_response
from genai_prompts import DESCRIPTION_VALIDATION_PROMPT
# Initialize GenAI analyzer (with feature flag support)
analyzer = GenAIAnalyzer(
use_genai=os.environ.get("GENAI_DOCS_VALIDATE", "true").lower() == "true"
)
def get_plugin_root() -> Path:
"""Find plugin root directory."""
# First, check if we're running from .claude/hooks (dogfooding)
hook_dir = Path(__file__).parent
repo_root = hook_dir.parent.parent # .claude/hooks -> .claude -> repo_root
plugin_path = repo_root / "plugins" / "autonomous-dev"
if plugin_path.exists():
return plugin_path
# Fallback: check if we're already in the plugin directory
current = hook_dir.parent
if (current / "agents").exists() and (current / "skills").exists():
return current
# Give up
raise FileNotFoundError("Could not find plugin root directory")
def count_skills(plugin_root: Path) -> int:
"""Count actual skills in skills/ directory."""
skills_dir = plugin_root / "skills"
return len([
d for d in skills_dir.iterdir()
if d.is_dir() and not d.name.startswith(".")
])
def count_agents(plugin_root: Path) -> int:
"""Count actual agents in agents/ directory."""
agents_dir = plugin_root / "agents"
return len([
f for f in agents_dir.iterdir()
if f.is_file() and f.suffix == ".md" and not f.name.startswith(".")
])
def count_commands(plugin_root: Path) -> int:
"""Count actual commands in commands/ directory."""
commands_dir = plugin_root / "commands"
return len([
f for f in commands_dir.iterdir()
if f.is_file() and f.suffix == ".md" and not f.name.startswith(".")
])
def check_readme_skill_count(plugin_root: Path, actual_count: int) -> Tuple[bool, str]:
"""Check README.md skill count matches actual."""
readme_path = plugin_root / "README.md"
if not readme_path.exists():
return False, "README.md not found"
content = readme_path.read_text()
pattern = rf"\b{actual_count}\s+[Ss]kills"
if not re.search(pattern, content):
return False, (
f"README.md shows incorrect skill count (expected {actual_count})\n"
f"Fix: Update README.md to show '{actual_count} Skills (Comprehensive SDLC Coverage)'"
)
return True, "✅ README.md skill count correct"
def check_readme_agent_count(plugin_root: Path, actual_count: int) -> Tuple[bool, str]:
"""Check README.md agent count matches actual."""
readme_path = plugin_root / "README.md"
content = readme_path.read_text()
pattern = rf"\b{actual_count}\s+[Ss]pecialized\s+[Aa]gents|\b{actual_count}\s+[Aa]gents"
if not re.search(pattern, content):
return False, (
f"README.md shows incorrect agent count (expected {actual_count})\n"
f"Fix: Update README.md to show '{actual_count} Specialized Agents'"
)
return True, "✅ README.md agent count correct"
def check_readme_command_count(plugin_root: Path, actual_count: int) -> Tuple[bool, str]:
"""Check README.md command count matches actual."""
readme_path = plugin_root / "README.md"
content = readme_path.read_text()
pattern = rf"\b{actual_count}\s+[Ss]lash\s+[Cc]ommands|\b{actual_count}\s+[Cc]ommands"
if not re.search(pattern, content):
return False, (
f"README.md shows incorrect command count (expected {actual_count})\n"
f"Fix: Update README.md to show '{actual_count} Slash Commands'"
)
return True, "✅ README.md command count correct"
def check_marketplace_json(plugin_root: Path, skill_count: int, agent_count: int, command_count: int) -> Tuple[bool, str]:
"""Check marketplace.json metrics match actual counts."""
marketplace_path = plugin_root / ".claude-plugin" / "marketplace.json"
if not marketplace_path.exists():
return True, "⚠️ marketplace.json not found (skipping)"
try:
data = json.loads(marketplace_path.read_text())
metrics = data.get("metrics", {})
errors = []
if metrics.get("skills") != skill_count:
errors.append(f"skills: {metrics.get('skills')} (should be {skill_count})")
if metrics.get("agents") != agent_count:
errors.append(f"agents: {metrics.get('agents')} (should be {agent_count})")
if metrics.get("commands") != command_count:
errors.append(f"commands: {metrics.get('commands')} (should be {command_count})")
if errors:
return False, (
f"marketplace.json metrics incorrect:\n"
+ "\n".join(f" - {e}" for e in errors) +
f"\nFix: Update .claude-plugin/marketplace.json metrics section"
)
return True, "✅ marketplace.json metrics correct"
except json.JSONDecodeError:
return False, "marketplace.json is invalid JSON"
def check_no_broken_skill_references(plugin_root: Path) -> Tuple[bool, str]:
"""Check for references to non-existent skills."""
# Get actual skills
skills_dir = plugin_root / "skills"
actual_skills = set(
d.name for d in skills_dir.iterdir()
if d.is_dir() and not d.name.startswith(".")
)
# Known problematic skills that have been removed
problematic_skills = ['engineering-standards']
readme_path = plugin_root / "README.md"
readme_content = readme_path.read_text()
broken_references = []
for skill in problematic_skills:
if skill not in actual_skills and skill in readme_content:
broken_references.append(skill)
if broken_references:
return False, (
f"README.md references non-existent skills: {broken_references}\n"
f"Fix: Remove or replace these skill references"
)
return True, "✅ No broken skill references"
def check_cross_document_consistency(plugin_root: Path, skill_count: int) -> Tuple[bool, str]:
"""Check all documentation files show same skill count."""
files_to_check = [
"README.md",
"docs/SYNC-STATUS.md",
"docs/UPDATES.md",
"INSTALL_TEMPLATE.md",
]
inconsistencies = []
for file_path in files_to_check:
full_path = plugin_root / file_path
if not full_path.exists():
continue
content = full_path.read_text()
# Look for skill count mentions
if str(skill_count) not in content or "skills" not in content.lower():
# Check if it mentions a different count
skill_mentions = re.findall(r'(\d+)\s+[Ss]kills', content)
if skill_mentions and int(skill_mentions[0]) != skill_count:
inconsistencies.append(f"{file_path}: shows {skill_mentions[0]} skills (should be {skill_count})")
if inconsistencies:
return False, (
f"Cross-document skill count inconsistency:\n"
+ "\n".join(f" - {i}" for i in inconsistencies) +
f"\nFix: Update all files to show {skill_count} skills"
)
return True, "✅ Cross-document consistency verified"
def validate_description_accuracy_with_genai(plugin_root: Path, entity_type: str) -> Tuple[bool, str]:
"""Use GenAI to validate if descriptions match actual implementation.
Delegates to shared GenAI utility with graceful fallback.
Args:
plugin_root: Root directory of plugin
entity_type: 'agents', 'skills', or 'commands'
Returns:
(passed, message) tuple
"""
# Get README.md section for the entity type
readme_path = plugin_root / "README.md"
if not readme_path.exists():
return True, f"⏭️ No README.md found"
readme_content = readme_path.read_text()
# Extract the relevant section (simplified - looks for entity type mentions)
section_start = readme_content.lower().find(entity_type.lower())
if section_start == -1:
return True, f"⏭️ No {entity_type} section found in README.md"
# Get a reasonable chunk of the section
section_end = min(section_start + 2000, len(readme_content))
section = readme_content[section_start:section_end]
# Call shared GenAI analyzer
response = analyzer.analyze(
DESCRIPTION_VALIDATION_PROMPT,
entity_type=entity_type,
section=section[:1000]
)
# Parse response using shared utility
if response:
is_accurate = parse_binary_response(
response,
true_keywords=["ACCURATE"],
false_keywords=["MISLEADING"]
)
if is_accurate is not None:
if is_accurate:
return True, f"✅ GenAI validated {entity_type} descriptions are accurate"
else:
return False, (
f"⚠️ GenAI found potential inaccuracies in {entity_type} descriptions\n"
f"Review README.md {entity_type} section for misleading or vague descriptions"
)
# Fallback: if GenAI unavailable or ambiguous, skip validation
return True, "⏭️ GenAI validation skipped (call failed or ambiguous)"
def main() -> int:
"""Run all documentation consistency checks.
Returns:
0 if all checks pass
1 if any check fails
"""
use_genai = os.environ.get("GENAI_DOCS_VALIDATE", "true").lower() == "true"
genai_status = "🤖 (with GenAI semantic validation)" if use_genai else ""
print(f"🔍 Validating documentation consistency... {genai_status}")
print()
try:
plugin_root = get_plugin_root()
except FileNotFoundError as e:
print(f"❌ Error: {e}")
return 1
# Count actual resources
skill_count = count_skills(plugin_root)
agent_count = count_agents(plugin_root)
command_count = count_commands(plugin_root)
print(f"📊 Actual counts:")
print(f" - Skills: {skill_count}")
print(f" - Agents: {agent_count}")
print(f" - Commands: {command_count}")
print()
# Run all checks
checks = [
("README.md skill count", check_readme_skill_count(plugin_root, skill_count)),
("README.md agent count", check_readme_agent_count(plugin_root, agent_count)),
("README.md command count", check_readme_command_count(plugin_root, command_count)),
("marketplace.json metrics", check_marketplace_json(plugin_root, skill_count, agent_count, command_count)),
("Broken skill references", check_no_broken_skill_references(plugin_root)),
("Cross-document consistency", check_cross_document_consistency(plugin_root, skill_count)),
]
# Add GenAI semantic validation if enabled
if use_genai:
checks.extend([
("Agent descriptions accuracy", validate_description_accuracy_with_genai(plugin_root, "agents")),
("Command descriptions accuracy", validate_description_accuracy_with_genai(plugin_root, "commands")),
])
all_passed = True
for check_name, (passed, message) in checks:
if passed:
print(f"{message}")
else:
print(f"{check_name} FAILED:")
print(f" {message}")
print()
all_passed = False
print()
if all_passed:
print("✅ All documentation consistency checks passed!")
return 0
else:
print("❌ Documentation consistency checks FAILED!")
print()
print("Fix the issues above before committing.")
print("Or run: pytest tests/test_documentation_consistency.py -v")
print()
print("To skip this hook (NOT RECOMMENDED):")
print(" git commit --no-verify")
return 1
if __name__ == "__main__":
sys.exit(main())