TradingAgents/.claude/lib/validate_manifest_doc_align...

561 lines
16 KiB
Python

#!/usr/bin/env python3
"""
Manifest-Documentation Alignment Validator.
DEPRECATED: This regex-based validator is deprecated as of v3.44.0.
Use hybrid_validator.py instead, which provides GenAI-powered semantic
validation with automatic fallback to regex if no API key is available.
Migration:
# Old (deprecated):
from validate_manifest_doc_alignment import validate_alignment
result = validate_alignment(manifest_path)
# New (recommended):
from hybrid_validator import validate_manifest_alignment
report = validate_manifest_alignment(repo_root)
Removal planned: v3.45.0
---
Validates that CLAUDE.md, PROJECT.md, and health-check.py component counts
match install_manifest.json (the single source of truth).
This prevents documentation drift by failing loudly when counts mismatch.
Usage:
python validate_manifest_doc_alignment.py
python validate_manifest_doc_alignment.py --fix # Show fix instructions
python validate_manifest_doc_alignment.py --manifest path/to/manifest.json
Issue #159: Prevent documentation drift after manifest completeness audit
Issue #160: GenAI-powered validation replaces regex-based approach
"""
import argparse
import json
import re
import sys
import warnings
from pathlib import Path
from typing import Dict, Any, Optional, List
# Emit deprecation warning on module import
warnings.warn(
"validate_manifest_doc_alignment is deprecated as of v3.44.0. "
"Use hybrid_validator.validate_manifest_alignment() instead. "
"This module will be removed in v3.45.0.",
DeprecationWarning,
stacklevel=2,
)
class DocumentationDriftError(Exception):
"""Raised when documentation structure prevents count extraction."""
pass
def find_project_root() -> Path:
"""Find the project root by looking for CLAUDE.md."""
current = Path.cwd()
for parent in [current] + list(current.parents):
if (parent / "CLAUDE.md").exists():
return parent
if (parent / "plugins" / "autonomous-dev").exists():
return parent
return current
def load_manifest_counts(manifest_path: Path) -> Dict[str, Any]:
"""
Load component counts from install_manifest.json.
Args:
manifest_path: Path to install_manifest.json
Returns:
Dict with counts for each component type and version
Raises:
FileNotFoundError: If manifest doesn't exist
json.JSONDecodeError: If manifest is invalid JSON
"""
if not manifest_path.exists():
raise FileNotFoundError(f"Manifest not found: {manifest_path}")
with open(manifest_path) as f:
manifest = json.load(f)
# Handle nested "components" structure (actual manifest format)
# or flat structure (test fixtures)
components = manifest.get("components", manifest)
# Count libs (key is "lib" not "libs" in manifest)
lib_files = components.get("lib", {}).get("files", [])
# Fallback to "libs" for test fixtures
if not lib_files:
lib_files = components.get("libs", {}).get("files", [])
# Count skill packages (directories), not individual files
# Each skill is in a directory like "skills/skill-name/skill.md"
skill_files = components.get("skills", {}).get("files", [])
# Extract unique skill directories
skill_dirs = set()
for f in skill_files:
# Extract directory name: "plugins/.../skills/skill-name/file.md" -> "skill-name"
parts = f.split("/")
if "skills" in parts:
skills_idx = parts.index("skills")
if skills_idx + 1 < len(parts):
skill_dirs.add(parts[skills_idx + 1])
counts = {
"version": manifest.get("version", "unknown"),
"agents": len(components.get("agents", {}).get("files", [])),
"commands": len(components.get("commands", {}).get("files", [])),
"hooks": len(components.get("hooks", {}).get("files", [])),
"libs": len(lib_files),
"skills": len(skill_dirs) if skill_dirs else len(skill_files),
}
return counts
def extract_claude_md_counts(claude_md_path: Path) -> Dict[str, int]:
"""
Extract component counts from CLAUDE.md table format.
Looks for table like:
| Component | Version | Count | Status |
| Agents | 1.0.0 | 21 | ✅ |
Args:
claude_md_path: Path to CLAUDE.md
Returns:
Dict with counts for each component type
Raises:
DocumentationDriftError: If table format not found
"""
content = claude_md_path.read_text()
# Match table rows: | Component | ... | Count | ... |
# Pattern: | Agents | 1.0.0 | 21 | ✅ Compliant |
table_pattern = r'\|\s*(Skills|Commands|Agents|Hooks)\s*\|\s*[\d.]+\s*\|\s*(\d+)\s*\|'
matches = re.findall(table_pattern, content, re.IGNORECASE)
if not matches:
raise DocumentationDriftError(
f"Component table not found in {claude_md_path}. "
"Expected format: | Component | Version | Count | Status |"
)
counts = {}
for component, count in matches:
key = component.lower()
counts[key] = int(count)
return counts
def extract_claude_md_version(claude_md_path: Path) -> str:
"""
Extract version from CLAUDE.md header.
Looks for: **Version**: v3.44.0
Args:
claude_md_path: Path to CLAUDE.md
Returns:
Version string (without 'v' prefix)
"""
content = claude_md_path.read_text()
# Match: **Version**: v3.44.0
version_pattern = r'\*\*Version\*\*:\s*v?([\d.]+)'
match = re.search(version_pattern, content)
if match:
return match.group(1)
return "unknown"
def extract_project_md_counts(project_md_path: Path) -> Dict[str, int]:
"""
Extract component counts from PROJECT.md table format.
Looks for table like:
| Component | Count | Purpose |
| Agents | 21 | Specialized AI assistants |
Args:
project_md_path: Path to PROJECT.md
Returns:
Dict with counts for each component type
"""
content = project_md_path.read_text()
# Match table rows: | Component | Count | ... |
# Pattern: | Agents | 21 | Purpose text |
table_pattern = r'\|\s*(Agents|Skills|Commands|Hooks|Libraries)\s*\|\s*(\d+)\s*\|'
matches = re.findall(table_pattern, content, re.IGNORECASE)
counts = {}
for component, count in matches:
key = component.lower()
# Normalize "Libraries" to "libs"
if key == "libraries":
key = "libs"
counts[key] = int(count)
return counts
def extract_project_md_version(project_md_path: Path) -> str:
"""
Extract version from PROJECT.md header.
Looks for: **Version**: v3.44.0
Args:
project_md_path: Path to PROJECT.md
Returns:
Version string (without 'v' prefix)
"""
content = project_md_path.read_text()
# Match: **Version**: v3.44.0
version_pattern = r'\*\*Version\*\*:\s*v?([\d.]+)'
match = re.search(version_pattern, content)
if match:
return match.group(1)
return "unknown"
def extract_health_check_counts(health_check_path: Path) -> Dict[str, int]:
"""
Extract expected component counts from health_check.py lists.
Looks for EXPECTED_AGENTS, EXPECTED_HOOKS, EXPECTED_COMMANDS lists.
Args:
health_check_path: Path to health_check.py
Returns:
Dict with counts for each component type
"""
content = health_check_path.read_text()
counts = {}
# Count items in EXPECTED_AGENTS list
agents_match = re.search(r'EXPECTED_AGENTS\s*=\s*\[(.*?)\]', content, re.DOTALL)
if agents_match:
items = re.findall(r'"([^"]+)"', agents_match.group(1))
counts["agents"] = len(items)
# Count items in EXPECTED_HOOKS list
hooks_match = re.search(r'EXPECTED_HOOKS\s*=\s*\[(.*?)\]', content, re.DOTALL)
if hooks_match:
items = re.findall(r'"([^"]+)"', hooks_match.group(1))
counts["hooks"] = len(items)
# Count items in EXPECTED_COMMANDS list
commands_match = re.search(r'EXPECTED_COMMANDS\s*=\s*\[(.*?)\]', content, re.DOTALL)
if commands_match:
items = re.findall(r'"([^"]+)"', commands_match.group(1))
counts["commands"] = len(items)
return counts
def detect_mismatches(
expected: Dict[str, Any],
actual: Dict[str, Any],
) -> Dict[str, Dict[str, Any]]:
"""
Detect mismatches between expected (manifest) and actual (doc) counts.
Args:
expected: Counts from manifest (source of truth)
actual: Counts from documentation file
Returns:
Dict of mismatches with expected and actual values
"""
mismatches = {}
for key in expected:
if key == "version":
continue # Handle version separately
if key in actual and expected[key] != actual[key]:
mismatches[key] = {
"expected": expected[key],
"actual": actual[key],
}
return mismatches
def detect_version_mismatch(expected: str, actual: str) -> Dict[str, Dict[str, str]]:
"""
Detect version mismatch.
Args:
expected: Version from manifest
actual: Version from document
Returns:
Dict with version mismatch if different
"""
if expected != actual and expected != "unknown" and actual != "unknown":
return {
"version": {
"expected": expected,
"actual": actual,
}
}
return {}
def validate_alignment(
manifest_path: Path,
claude_md_path: Optional[Path] = None,
project_md_path: Optional[Path] = None,
health_check_path: Optional[Path] = None,
) -> Dict[str, Any]:
"""
Validate alignment between manifest and documentation files.
Args:
manifest_path: Path to install_manifest.json
claude_md_path: Optional path to CLAUDE.md
project_md_path: Optional path to PROJECT.md
health_check_path: Optional path to health_check.py
Returns:
Dict with status, mismatches, and details
"""
result = {
"status": "ALIGNED",
"mismatches": {},
"details": {},
}
# Load manifest counts (source of truth)
manifest_counts = load_manifest_counts(manifest_path)
result["details"]["manifest"] = manifest_counts
# Validate CLAUDE.md
if claude_md_path and claude_md_path.exists():
try:
claude_counts = extract_claude_md_counts(claude_md_path)
claude_version = extract_claude_md_version(claude_md_path)
mismatches = detect_mismatches(manifest_counts, claude_counts)
version_mismatch = detect_version_mismatch(
manifest_counts["version"], claude_version
)
if mismatches or version_mismatch:
result["status"] = "DRIFTED"
for key, value in mismatches.items():
value["file"] = "CLAUDE.md"
result["mismatches"][f"claude_md_{key}"] = value
if version_mismatch:
version_mismatch["version"]["file"] = "CLAUDE.md"
result["mismatches"]["claude_md_version"] = version_mismatch["version"]
result["details"]["claude_md"] = {
"counts": claude_counts,
"version": claude_version,
}
except DocumentationDriftError as e:
result["status"] = "ERROR"
result["mismatches"]["claude_md_format"] = {"error": str(e)}
# Validate PROJECT.md
if project_md_path and project_md_path.exists():
project_counts = extract_project_md_counts(project_md_path)
project_version = extract_project_md_version(project_md_path)
mismatches = detect_mismatches(manifest_counts, project_counts)
version_mismatch = detect_version_mismatch(
manifest_counts["version"], project_version
)
if mismatches or version_mismatch:
result["status"] = "DRIFTED"
for key, value in mismatches.items():
value["file"] = "PROJECT.md"
result["mismatches"][f"project_md_{key}"] = value
if version_mismatch:
version_mismatch["version"]["file"] = "PROJECT.md"
result["mismatches"]["project_md_version"] = version_mismatch["version"]
result["details"]["project_md"] = {
"counts": project_counts,
"version": project_version,
}
# Note: health_check.py validates "core" components (8 agents, 12 hooks, 8 commands)
# not ALL installed components. So we don't compare it to manifest counts.
# health_check.py is intentionally a subset for essential pipeline validation.
return result
def generate_fix_instructions(mismatches: Dict[str, Dict[str, Any]]) -> str:
"""
Generate actionable fix instructions for mismatches.
Args:
mismatches: Dict of detected mismatches
Returns:
Human-readable fix instructions
"""
if not mismatches:
return "✅ All documentation is aligned with manifest."
lines = [
"❌ Documentation drift detected!",
"",
"The following files need updates to match install_manifest.json:",
"",
]
# Group by file
by_file: Dict[str, List[str]] = {}
for key, value in mismatches.items():
file = value.get("file", "unknown")
if file not in by_file:
by_file[file] = []
if "error" in value:
by_file[file].append(f" - ERROR: {value['error']}")
else:
component = key.split("_")[-1] # Extract component name
by_file[file].append(
f" - {component}: expected {value['expected']}, found {value['actual']}"
)
for file, issues in by_file.items():
lines.append(f"**{file}**:")
lines.extend(issues)
lines.append("")
lines.extend([
"To fix:",
"1. Update the counts in the affected files to match install_manifest.json",
"2. Update version numbers to match manifest version",
"3. Run this validator again to confirm alignment",
])
return "\n".join(lines)
def should_block_commit(result: Dict[str, Any]) -> bool:
"""
Determine if a commit should be blocked based on validation result.
Args:
result: Validation result from validate_alignment()
Returns:
True if commit should be blocked
"""
return result["status"] in ("DRIFTED", "ERROR")
def main(args: Optional[List[str]] = None) -> int:
"""
CLI entry point.
Args:
args: Command line arguments (defaults to sys.argv)
Returns:
Exit code (0 = aligned, 1 = drifted, 2 = error)
"""
parser = argparse.ArgumentParser(
description="Validate manifest-documentation alignment"
)
parser.add_argument(
"--manifest",
type=Path,
help="Path to install_manifest.json",
)
parser.add_argument(
"--claude-md",
type=Path,
help="Path to CLAUDE.md",
)
parser.add_argument(
"--project-md",
type=Path,
help="Path to PROJECT.md",
)
parser.add_argument(
"--fix",
action="store_true",
help="Show fix instructions",
)
parser.add_argument(
"--json",
action="store_true",
help="Output as JSON",
)
parsed = parser.parse_args(args)
# Find project root and default paths
root = find_project_root()
manifest_path = parsed.manifest or (
root / "plugins" / "autonomous-dev" / "config" / "install_manifest.json"
)
claude_md_path = parsed.claude_md or (root / "CLAUDE.md")
project_md_path = parsed.project_md or (root / "PROJECT.md")
try:
result = validate_alignment(
manifest_path=manifest_path,
claude_md_path=claude_md_path,
project_md_path=project_md_path,
)
if parsed.json:
print(json.dumps(result, indent=2))
else:
if result["status"] == "ALIGNED":
print("✅ Documentation is aligned with install_manifest.json")
return 0
else:
print(generate_fix_instructions(result["mismatches"]))
return 1
except FileNotFoundError as e:
print(f"❌ Error: {e}")
return 2
except json.JSONDecodeError as e:
print(f"❌ Invalid JSON in manifest: {e}")
return 2
return 0 if result["status"] == "ALIGNED" else 1
if __name__ == "__main__":
sys.exit(main())