561 lines
16 KiB
Python
561 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Manifest-Documentation Alignment Validator.
|
|
|
|
DEPRECATED: This regex-based validator is deprecated as of v3.44.0.
|
|
Use hybrid_validator.py instead, which provides GenAI-powered semantic
|
|
validation with automatic fallback to regex if no API key is available.
|
|
|
|
Migration:
|
|
# Old (deprecated):
|
|
from validate_manifest_doc_alignment import validate_alignment
|
|
result = validate_alignment(manifest_path)
|
|
|
|
# New (recommended):
|
|
from hybrid_validator import validate_manifest_alignment
|
|
report = validate_manifest_alignment(repo_root)
|
|
|
|
Removal planned: v3.45.0
|
|
|
|
---
|
|
|
|
Validates that CLAUDE.md, PROJECT.md, and health-check.py component counts
|
|
match install_manifest.json (the single source of truth).
|
|
|
|
This prevents documentation drift by failing loudly when counts mismatch.
|
|
|
|
Usage:
|
|
python validate_manifest_doc_alignment.py
|
|
python validate_manifest_doc_alignment.py --fix # Show fix instructions
|
|
python validate_manifest_doc_alignment.py --manifest path/to/manifest.json
|
|
|
|
Issue #159: Prevent documentation drift after manifest completeness audit
|
|
Issue #160: GenAI-powered validation replaces regex-based approach
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
import warnings
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional, List
|
|
|
|
# Emit deprecation warning on module import
|
|
warnings.warn(
|
|
"validate_manifest_doc_alignment is deprecated as of v3.44.0. "
|
|
"Use hybrid_validator.validate_manifest_alignment() instead. "
|
|
"This module will be removed in v3.45.0.",
|
|
DeprecationWarning,
|
|
stacklevel=2,
|
|
)
|
|
|
|
|
|
class DocumentationDriftError(Exception):
|
|
"""Raised when documentation structure prevents count extraction."""
|
|
pass
|
|
|
|
|
|
def find_project_root() -> Path:
|
|
"""Find the project root by looking for CLAUDE.md."""
|
|
current = Path.cwd()
|
|
for parent in [current] + list(current.parents):
|
|
if (parent / "CLAUDE.md").exists():
|
|
return parent
|
|
if (parent / "plugins" / "autonomous-dev").exists():
|
|
return parent
|
|
return current
|
|
|
|
|
|
def load_manifest_counts(manifest_path: Path) -> Dict[str, Any]:
|
|
"""
|
|
Load component counts from install_manifest.json.
|
|
|
|
Args:
|
|
manifest_path: Path to install_manifest.json
|
|
|
|
Returns:
|
|
Dict with counts for each component type and version
|
|
|
|
Raises:
|
|
FileNotFoundError: If manifest doesn't exist
|
|
json.JSONDecodeError: If manifest is invalid JSON
|
|
"""
|
|
if not manifest_path.exists():
|
|
raise FileNotFoundError(f"Manifest not found: {manifest_path}")
|
|
|
|
with open(manifest_path) as f:
|
|
manifest = json.load(f)
|
|
|
|
# Handle nested "components" structure (actual manifest format)
|
|
# or flat structure (test fixtures)
|
|
components = manifest.get("components", manifest)
|
|
|
|
# Count libs (key is "lib" not "libs" in manifest)
|
|
lib_files = components.get("lib", {}).get("files", [])
|
|
# Fallback to "libs" for test fixtures
|
|
if not lib_files:
|
|
lib_files = components.get("libs", {}).get("files", [])
|
|
|
|
# Count skill packages (directories), not individual files
|
|
# Each skill is in a directory like "skills/skill-name/skill.md"
|
|
skill_files = components.get("skills", {}).get("files", [])
|
|
# Extract unique skill directories
|
|
skill_dirs = set()
|
|
for f in skill_files:
|
|
# Extract directory name: "plugins/.../skills/skill-name/file.md" -> "skill-name"
|
|
parts = f.split("/")
|
|
if "skills" in parts:
|
|
skills_idx = parts.index("skills")
|
|
if skills_idx + 1 < len(parts):
|
|
skill_dirs.add(parts[skills_idx + 1])
|
|
|
|
counts = {
|
|
"version": manifest.get("version", "unknown"),
|
|
"agents": len(components.get("agents", {}).get("files", [])),
|
|
"commands": len(components.get("commands", {}).get("files", [])),
|
|
"hooks": len(components.get("hooks", {}).get("files", [])),
|
|
"libs": len(lib_files),
|
|
"skills": len(skill_dirs) if skill_dirs else len(skill_files),
|
|
}
|
|
|
|
return counts
|
|
|
|
|
|
def extract_claude_md_counts(claude_md_path: Path) -> Dict[str, int]:
|
|
"""
|
|
Extract component counts from CLAUDE.md table format.
|
|
|
|
Looks for table like:
|
|
| Component | Version | Count | Status |
|
|
| Agents | 1.0.0 | 21 | ✅ |
|
|
|
|
Args:
|
|
claude_md_path: Path to CLAUDE.md
|
|
|
|
Returns:
|
|
Dict with counts for each component type
|
|
|
|
Raises:
|
|
DocumentationDriftError: If table format not found
|
|
"""
|
|
content = claude_md_path.read_text()
|
|
|
|
# Match table rows: | Component | ... | Count | ... |
|
|
# Pattern: | Agents | 1.0.0 | 21 | ✅ Compliant |
|
|
table_pattern = r'\|\s*(Skills|Commands|Agents|Hooks)\s*\|\s*[\d.]+\s*\|\s*(\d+)\s*\|'
|
|
|
|
matches = re.findall(table_pattern, content, re.IGNORECASE)
|
|
|
|
if not matches:
|
|
raise DocumentationDriftError(
|
|
f"Component table not found in {claude_md_path}. "
|
|
"Expected format: | Component | Version | Count | Status |"
|
|
)
|
|
|
|
counts = {}
|
|
for component, count in matches:
|
|
key = component.lower()
|
|
counts[key] = int(count)
|
|
|
|
return counts
|
|
|
|
|
|
def extract_claude_md_version(claude_md_path: Path) -> str:
|
|
"""
|
|
Extract version from CLAUDE.md header.
|
|
|
|
Looks for: **Version**: v3.44.0
|
|
|
|
Args:
|
|
claude_md_path: Path to CLAUDE.md
|
|
|
|
Returns:
|
|
Version string (without 'v' prefix)
|
|
"""
|
|
content = claude_md_path.read_text()
|
|
|
|
# Match: **Version**: v3.44.0
|
|
version_pattern = r'\*\*Version\*\*:\s*v?([\d.]+)'
|
|
match = re.search(version_pattern, content)
|
|
|
|
if match:
|
|
return match.group(1)
|
|
|
|
return "unknown"
|
|
|
|
|
|
def extract_project_md_counts(project_md_path: Path) -> Dict[str, int]:
|
|
"""
|
|
Extract component counts from PROJECT.md table format.
|
|
|
|
Looks for table like:
|
|
| Component | Count | Purpose |
|
|
| Agents | 21 | Specialized AI assistants |
|
|
|
|
Args:
|
|
project_md_path: Path to PROJECT.md
|
|
|
|
Returns:
|
|
Dict with counts for each component type
|
|
"""
|
|
content = project_md_path.read_text()
|
|
|
|
# Match table rows: | Component | Count | ... |
|
|
# Pattern: | Agents | 21 | Purpose text |
|
|
table_pattern = r'\|\s*(Agents|Skills|Commands|Hooks|Libraries)\s*\|\s*(\d+)\s*\|'
|
|
|
|
matches = re.findall(table_pattern, content, re.IGNORECASE)
|
|
|
|
counts = {}
|
|
for component, count in matches:
|
|
key = component.lower()
|
|
# Normalize "Libraries" to "libs"
|
|
if key == "libraries":
|
|
key = "libs"
|
|
counts[key] = int(count)
|
|
|
|
return counts
|
|
|
|
|
|
def extract_project_md_version(project_md_path: Path) -> str:
|
|
"""
|
|
Extract version from PROJECT.md header.
|
|
|
|
Looks for: **Version**: v3.44.0
|
|
|
|
Args:
|
|
project_md_path: Path to PROJECT.md
|
|
|
|
Returns:
|
|
Version string (without 'v' prefix)
|
|
"""
|
|
content = project_md_path.read_text()
|
|
|
|
# Match: **Version**: v3.44.0
|
|
version_pattern = r'\*\*Version\*\*:\s*v?([\d.]+)'
|
|
match = re.search(version_pattern, content)
|
|
|
|
if match:
|
|
return match.group(1)
|
|
|
|
return "unknown"
|
|
|
|
|
|
def extract_health_check_counts(health_check_path: Path) -> Dict[str, int]:
|
|
"""
|
|
Extract expected component counts from health_check.py lists.
|
|
|
|
Looks for EXPECTED_AGENTS, EXPECTED_HOOKS, EXPECTED_COMMANDS lists.
|
|
|
|
Args:
|
|
health_check_path: Path to health_check.py
|
|
|
|
Returns:
|
|
Dict with counts for each component type
|
|
"""
|
|
content = health_check_path.read_text()
|
|
|
|
counts = {}
|
|
|
|
# Count items in EXPECTED_AGENTS list
|
|
agents_match = re.search(r'EXPECTED_AGENTS\s*=\s*\[(.*?)\]', content, re.DOTALL)
|
|
if agents_match:
|
|
items = re.findall(r'"([^"]+)"', agents_match.group(1))
|
|
counts["agents"] = len(items)
|
|
|
|
# Count items in EXPECTED_HOOKS list
|
|
hooks_match = re.search(r'EXPECTED_HOOKS\s*=\s*\[(.*?)\]', content, re.DOTALL)
|
|
if hooks_match:
|
|
items = re.findall(r'"([^"]+)"', hooks_match.group(1))
|
|
counts["hooks"] = len(items)
|
|
|
|
# Count items in EXPECTED_COMMANDS list
|
|
commands_match = re.search(r'EXPECTED_COMMANDS\s*=\s*\[(.*?)\]', content, re.DOTALL)
|
|
if commands_match:
|
|
items = re.findall(r'"([^"]+)"', commands_match.group(1))
|
|
counts["commands"] = len(items)
|
|
|
|
return counts
|
|
|
|
|
|
def detect_mismatches(
|
|
expected: Dict[str, Any],
|
|
actual: Dict[str, Any],
|
|
) -> Dict[str, Dict[str, Any]]:
|
|
"""
|
|
Detect mismatches between expected (manifest) and actual (doc) counts.
|
|
|
|
Args:
|
|
expected: Counts from manifest (source of truth)
|
|
actual: Counts from documentation file
|
|
|
|
Returns:
|
|
Dict of mismatches with expected and actual values
|
|
"""
|
|
mismatches = {}
|
|
|
|
for key in expected:
|
|
if key == "version":
|
|
continue # Handle version separately
|
|
if key in actual and expected[key] != actual[key]:
|
|
mismatches[key] = {
|
|
"expected": expected[key],
|
|
"actual": actual[key],
|
|
}
|
|
|
|
return mismatches
|
|
|
|
|
|
def detect_version_mismatch(expected: str, actual: str) -> Dict[str, Dict[str, str]]:
|
|
"""
|
|
Detect version mismatch.
|
|
|
|
Args:
|
|
expected: Version from manifest
|
|
actual: Version from document
|
|
|
|
Returns:
|
|
Dict with version mismatch if different
|
|
"""
|
|
if expected != actual and expected != "unknown" and actual != "unknown":
|
|
return {
|
|
"version": {
|
|
"expected": expected,
|
|
"actual": actual,
|
|
}
|
|
}
|
|
return {}
|
|
|
|
|
|
def validate_alignment(
|
|
manifest_path: Path,
|
|
claude_md_path: Optional[Path] = None,
|
|
project_md_path: Optional[Path] = None,
|
|
health_check_path: Optional[Path] = None,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Validate alignment between manifest and documentation files.
|
|
|
|
Args:
|
|
manifest_path: Path to install_manifest.json
|
|
claude_md_path: Optional path to CLAUDE.md
|
|
project_md_path: Optional path to PROJECT.md
|
|
health_check_path: Optional path to health_check.py
|
|
|
|
Returns:
|
|
Dict with status, mismatches, and details
|
|
"""
|
|
result = {
|
|
"status": "ALIGNED",
|
|
"mismatches": {},
|
|
"details": {},
|
|
}
|
|
|
|
# Load manifest counts (source of truth)
|
|
manifest_counts = load_manifest_counts(manifest_path)
|
|
result["details"]["manifest"] = manifest_counts
|
|
|
|
# Validate CLAUDE.md
|
|
if claude_md_path and claude_md_path.exists():
|
|
try:
|
|
claude_counts = extract_claude_md_counts(claude_md_path)
|
|
claude_version = extract_claude_md_version(claude_md_path)
|
|
|
|
mismatches = detect_mismatches(manifest_counts, claude_counts)
|
|
version_mismatch = detect_version_mismatch(
|
|
manifest_counts["version"], claude_version
|
|
)
|
|
|
|
if mismatches or version_mismatch:
|
|
result["status"] = "DRIFTED"
|
|
for key, value in mismatches.items():
|
|
value["file"] = "CLAUDE.md"
|
|
result["mismatches"][f"claude_md_{key}"] = value
|
|
if version_mismatch:
|
|
version_mismatch["version"]["file"] = "CLAUDE.md"
|
|
result["mismatches"]["claude_md_version"] = version_mismatch["version"]
|
|
|
|
result["details"]["claude_md"] = {
|
|
"counts": claude_counts,
|
|
"version": claude_version,
|
|
}
|
|
|
|
except DocumentationDriftError as e:
|
|
result["status"] = "ERROR"
|
|
result["mismatches"]["claude_md_format"] = {"error": str(e)}
|
|
|
|
# Validate PROJECT.md
|
|
if project_md_path and project_md_path.exists():
|
|
project_counts = extract_project_md_counts(project_md_path)
|
|
project_version = extract_project_md_version(project_md_path)
|
|
|
|
mismatches = detect_mismatches(manifest_counts, project_counts)
|
|
version_mismatch = detect_version_mismatch(
|
|
manifest_counts["version"], project_version
|
|
)
|
|
|
|
if mismatches or version_mismatch:
|
|
result["status"] = "DRIFTED"
|
|
for key, value in mismatches.items():
|
|
value["file"] = "PROJECT.md"
|
|
result["mismatches"][f"project_md_{key}"] = value
|
|
if version_mismatch:
|
|
version_mismatch["version"]["file"] = "PROJECT.md"
|
|
result["mismatches"]["project_md_version"] = version_mismatch["version"]
|
|
|
|
result["details"]["project_md"] = {
|
|
"counts": project_counts,
|
|
"version": project_version,
|
|
}
|
|
|
|
# Note: health_check.py validates "core" components (8 agents, 12 hooks, 8 commands)
|
|
# not ALL installed components. So we don't compare it to manifest counts.
|
|
# health_check.py is intentionally a subset for essential pipeline validation.
|
|
|
|
return result
|
|
|
|
|
|
def generate_fix_instructions(mismatches: Dict[str, Dict[str, Any]]) -> str:
|
|
"""
|
|
Generate actionable fix instructions for mismatches.
|
|
|
|
Args:
|
|
mismatches: Dict of detected mismatches
|
|
|
|
Returns:
|
|
Human-readable fix instructions
|
|
"""
|
|
if not mismatches:
|
|
return "✅ All documentation is aligned with manifest."
|
|
|
|
lines = [
|
|
"❌ Documentation drift detected!",
|
|
"",
|
|
"The following files need updates to match install_manifest.json:",
|
|
"",
|
|
]
|
|
|
|
# Group by file
|
|
by_file: Dict[str, List[str]] = {}
|
|
for key, value in mismatches.items():
|
|
file = value.get("file", "unknown")
|
|
if file not in by_file:
|
|
by_file[file] = []
|
|
|
|
if "error" in value:
|
|
by_file[file].append(f" - ERROR: {value['error']}")
|
|
else:
|
|
component = key.split("_")[-1] # Extract component name
|
|
by_file[file].append(
|
|
f" - {component}: expected {value['expected']}, found {value['actual']}"
|
|
)
|
|
|
|
for file, issues in by_file.items():
|
|
lines.append(f"**{file}**:")
|
|
lines.extend(issues)
|
|
lines.append("")
|
|
|
|
lines.extend([
|
|
"To fix:",
|
|
"1. Update the counts in the affected files to match install_manifest.json",
|
|
"2. Update version numbers to match manifest version",
|
|
"3. Run this validator again to confirm alignment",
|
|
])
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def should_block_commit(result: Dict[str, Any]) -> bool:
|
|
"""
|
|
Determine if a commit should be blocked based on validation result.
|
|
|
|
Args:
|
|
result: Validation result from validate_alignment()
|
|
|
|
Returns:
|
|
True if commit should be blocked
|
|
"""
|
|
return result["status"] in ("DRIFTED", "ERROR")
|
|
|
|
|
|
def main(args: Optional[List[str]] = None) -> int:
|
|
"""
|
|
CLI entry point.
|
|
|
|
Args:
|
|
args: Command line arguments (defaults to sys.argv)
|
|
|
|
Returns:
|
|
Exit code (0 = aligned, 1 = drifted, 2 = error)
|
|
"""
|
|
parser = argparse.ArgumentParser(
|
|
description="Validate manifest-documentation alignment"
|
|
)
|
|
parser.add_argument(
|
|
"--manifest",
|
|
type=Path,
|
|
help="Path to install_manifest.json",
|
|
)
|
|
parser.add_argument(
|
|
"--claude-md",
|
|
type=Path,
|
|
help="Path to CLAUDE.md",
|
|
)
|
|
parser.add_argument(
|
|
"--project-md",
|
|
type=Path,
|
|
help="Path to PROJECT.md",
|
|
)
|
|
parser.add_argument(
|
|
"--fix",
|
|
action="store_true",
|
|
help="Show fix instructions",
|
|
)
|
|
parser.add_argument(
|
|
"--json",
|
|
action="store_true",
|
|
help="Output as JSON",
|
|
)
|
|
|
|
parsed = parser.parse_args(args)
|
|
|
|
# Find project root and default paths
|
|
root = find_project_root()
|
|
|
|
manifest_path = parsed.manifest or (
|
|
root / "plugins" / "autonomous-dev" / "config" / "install_manifest.json"
|
|
)
|
|
claude_md_path = parsed.claude_md or (root / "CLAUDE.md")
|
|
project_md_path = parsed.project_md or (root / "PROJECT.md")
|
|
|
|
try:
|
|
result = validate_alignment(
|
|
manifest_path=manifest_path,
|
|
claude_md_path=claude_md_path,
|
|
project_md_path=project_md_path,
|
|
)
|
|
|
|
if parsed.json:
|
|
print(json.dumps(result, indent=2))
|
|
else:
|
|
if result["status"] == "ALIGNED":
|
|
print("✅ Documentation is aligned with install_manifest.json")
|
|
return 0
|
|
else:
|
|
print(generate_fix_instructions(result["mismatches"]))
|
|
return 1
|
|
|
|
except FileNotFoundError as e:
|
|
print(f"❌ Error: {e}")
|
|
return 2
|
|
except json.JSONDecodeError as e:
|
|
print(f"❌ Invalid JSON in manifest: {e}")
|
|
return 2
|
|
|
|
return 0 if result["status"] == "ALIGNED" else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|