425 lines
14 KiB
Python
Executable File
425 lines
14 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
File Organization Enforcer - Keeps project structure clean (GenAI-Enhanced)
|
|
|
|
This script enforces the standard project structure using intelligent GenAI
|
|
analysis instead of rigid pattern matching.
|
|
|
|
What it does:
|
|
- Analyzes file content and context to suggest optimal location
|
|
- Reads PROJECT.md for project-specific conventions
|
|
- Understands edge cases (setup.py is config, not source code)
|
|
- Explains reasoning for each suggestion
|
|
- Gracefully falls back to heuristics if GenAI unavailable
|
|
|
|
Benefits vs rules-based:
|
|
- Context-aware: Understands file purpose, not just extension
|
|
- Forgiving: Respects project conventions and common patterns
|
|
- Educational: Explains why each file belongs where it does
|
|
- Adaptable: Learns from PROJECT.md standards
|
|
|
|
Can run in two modes:
|
|
1. Validation mode (default): Reports violations with reasoning
|
|
2. Fix mode (--fix): Automatically fixes violations
|
|
|
|
Usage:
|
|
# Check for violations (with GenAI analysis)
|
|
python hooks/enforce_file_organization.py
|
|
|
|
# Auto-fix violations
|
|
python hooks/enforce_file_organization.py --fix
|
|
|
|
# Disable GenAI (use heuristics only)
|
|
GENAI_FILE_ORGANIZATION=false python hooks/enforce_file_organization.py
|
|
|
|
Exit codes:
|
|
- 0: Structure correct or successfully fixed
|
|
- 1: Violations found (validation mode)
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import shutil
|
|
from pathlib import Path
|
|
from typing import List, Tuple, Dict, Optional
|
|
try:
|
|
from genai_utils import GenAIAnalyzer, should_use_genai
|
|
from genai_prompts import FILE_ORGANIZATION_PROMPT
|
|
except ImportError:
|
|
# When run from different directory, try absolute import
|
|
from hooks.genai_utils import GenAIAnalyzer, should_use_genai
|
|
from hooks.genai_prompts import FILE_ORGANIZATION_PROMPT
|
|
|
|
|
|
def load_structure_template() -> Dict:
|
|
"""Load standard project structure template."""
|
|
template_path = Path(__file__).parent.parent / "templates" / "project-structure.json"
|
|
|
|
if not template_path.exists():
|
|
return get_default_structure()
|
|
|
|
return json.loads(template_path.read_text())
|
|
|
|
|
|
def get_default_structure() -> Dict:
|
|
"""Get default structure if template not found."""
|
|
return {
|
|
"structure": {
|
|
"src/": {"required": True},
|
|
"tests/": {"required": True},
|
|
"docs/": {"required": True},
|
|
"scripts/": {"required": False},
|
|
".claude/": {"required": True}
|
|
}
|
|
}
|
|
|
|
|
|
def get_project_root() -> Path:
|
|
"""Find project root directory."""
|
|
current = Path.cwd()
|
|
|
|
while current != current.parent:
|
|
if (current / ".git").exists() or (current / "PROJECT.md").exists():
|
|
return current
|
|
current = current.parent
|
|
|
|
return Path.cwd()
|
|
|
|
|
|
def check_required_directories(project_root: Path, structure: Dict) -> List[str]:
|
|
"""Check for missing required directories."""
|
|
missing = []
|
|
|
|
for dir_name, config in structure.get("structure", {}).items():
|
|
if not dir_name.endswith("/"):
|
|
continue
|
|
|
|
if config.get("required", False):
|
|
dir_path = project_root / dir_name.rstrip("/")
|
|
if not dir_path.exists():
|
|
missing.append(dir_name)
|
|
|
|
return missing
|
|
|
|
|
|
def read_project_context(project_root: Path) -> str:
|
|
"""Read PROJECT.md and CLAUDE.md for project-specific organization standards."""
|
|
import re
|
|
context_parts = []
|
|
|
|
# Read CLAUDE.md for root file policies
|
|
claude_md = project_root / "CLAUDE.md"
|
|
if claude_md.exists():
|
|
content = claude_md.read_text()
|
|
|
|
# Extract root directory section
|
|
root_match = re.search(
|
|
r'##\s*(Root Directory|Root Files|File Organization)\s*\n(.*?)(?=\n##\s|\Z)',
|
|
content,
|
|
re.DOTALL | re.IGNORECASE
|
|
)
|
|
|
|
if root_match:
|
|
context_parts.append("Project Standards (from CLAUDE.md):")
|
|
context_parts.append(root_match.group(2).strip()[:400])
|
|
|
|
# Read PROJECT.md for file organization section
|
|
project_md = project_root / "PROJECT.md"
|
|
if project_md.exists():
|
|
content = project_md.read_text()
|
|
|
|
org_match = re.search(
|
|
r'##\s*(File Organization|Directory Structure|Project Structure)\s*\n(.*?)(?=\n##\s|\Z)',
|
|
content,
|
|
re.DOTALL | re.IGNORECASE
|
|
)
|
|
|
|
if org_match:
|
|
context_parts.append("File Organization (from PROJECT.md):")
|
|
context_parts.append(org_match.group(2).strip()[:400])
|
|
|
|
if context_parts:
|
|
return "\n\n".join(context_parts)
|
|
|
|
return "Standard project structure (src/, tests/, docs/, scripts/)"
|
|
|
|
|
|
def analyze_file_with_genai(
|
|
file_path: Path,
|
|
project_root: Path,
|
|
analyzer: Optional[GenAIAnalyzer] = None
|
|
) -> Tuple[str, str]:
|
|
"""
|
|
Use GenAI to analyze file and suggest location.
|
|
|
|
Returns:
|
|
(suggested_location, reason) tuple
|
|
"""
|
|
if not analyzer:
|
|
return heuristic_file_location(file_path)
|
|
|
|
# Read file content (first 20 lines)
|
|
try:
|
|
lines = file_path.read_text().split('\n')[:20]
|
|
content_preview = '\n'.join(lines)
|
|
except:
|
|
content_preview = "(binary file or read error)"
|
|
|
|
# Get project context
|
|
project_context = read_project_context(project_root)
|
|
|
|
# Analyze with GenAI
|
|
response = analyzer.analyze(
|
|
FILE_ORGANIZATION_PROMPT,
|
|
filename=file_path.name,
|
|
extension=file_path.suffix,
|
|
content_preview=content_preview,
|
|
project_context=project_context
|
|
)
|
|
|
|
if not response:
|
|
# Fallback to heuristics
|
|
return heuristic_file_location(file_path)
|
|
|
|
# Parse response: "LOCATION | reason"
|
|
parts = response.split('|', 1)
|
|
if len(parts) != 2:
|
|
return heuristic_file_location(file_path)
|
|
|
|
location = parts[0].strip()
|
|
reason = parts[1].strip()
|
|
|
|
return (location, reason)
|
|
|
|
|
|
def heuristic_file_location(file_path: Path) -> Tuple[str, str]:
|
|
"""
|
|
Fallback heuristic rules for file organization (used if GenAI unavailable).
|
|
|
|
Returns:
|
|
(suggested_location, reason) tuple
|
|
"""
|
|
filename = file_path.name
|
|
|
|
# Common root files (standard across most projects)
|
|
COMMON_ROOT_FILES = {
|
|
# Essential docs
|
|
"README.md", "CHANGELOG.md", "LICENSE", "LICENSE.md",
|
|
# Community docs
|
|
"CODE_OF_CONDUCT.md", "CONTRIBUTING.md", "SECURITY.md",
|
|
# Project standards
|
|
"CLAUDE.md", "PROJECT.md",
|
|
# Build/config
|
|
"setup.py", "conftest.py", "pyproject.toml", "package.json",
|
|
"tsconfig.json", "Makefile", "Dockerfile", ".gitignore",
|
|
".dockerignore", "requirements.txt", "package-lock.json",
|
|
"poetry.lock", "Cargo.toml", "go.mod"
|
|
}
|
|
|
|
# Allowed files in root
|
|
if filename in COMMON_ROOT_FILES:
|
|
return ("root", "allowed root file per project standards")
|
|
|
|
# Test files
|
|
if filename.startswith("test_") or filename.endswith("_test.py") or "_test." in filename:
|
|
return ("tests/unit/", "test file (heuristic)")
|
|
|
|
# Temporary/scratch files
|
|
if filename in ["test.py", "debug.py"] or filename.startswith(("temp", "scratch")):
|
|
return ("DELETE", "temporary or scratch file (heuristic)")
|
|
|
|
# Documentation (not in allowed root list)
|
|
if file_path.suffix == ".md":
|
|
return ("docs/", "markdown documentation (heuristic)")
|
|
|
|
# Scripts (shell scripts)
|
|
if file_path.suffix in [".sh", ".bash"]:
|
|
return ("scripts/", "shell script (heuristic)")
|
|
|
|
# Source code files
|
|
if file_path.suffix in [".py", ".js", ".ts", ".go", ".rs", ".java"]:
|
|
return ("src/", "source code file (heuristic)")
|
|
|
|
# Unknown - leave in root
|
|
return ("root", "unknown file type - manual review needed")
|
|
|
|
|
|
def find_misplaced_files(project_root: Path, use_genai: bool = True, verbose: bool = False) -> List[Tuple[Path, str, str]]:
|
|
"""
|
|
Find files in root that should be in subdirectories.
|
|
|
|
Args:
|
|
project_root: Project root directory
|
|
use_genai: Whether to use GenAI analysis (default: True)
|
|
verbose: Show debug output about GenAI status
|
|
|
|
Returns:
|
|
List of (file_path, suggested_location, reason) tuples
|
|
"""
|
|
misplaced = []
|
|
|
|
# Initialize GenAI analyzer if enabled
|
|
analyzer = None
|
|
genai_enabled = use_genai and should_use_genai("GENAI_FILE_ORGANIZATION")
|
|
|
|
if verbose or os.environ.get("DEBUG_GENAI"):
|
|
print("\n🔧 GenAI File Organization Status:", file=sys.stderr)
|
|
print(f" SDK Requested: {use_genai}", file=sys.stderr)
|
|
print(f" Feature Flag: {should_use_genai('GENAI_FILE_ORGANIZATION')}", file=sys.stderr)
|
|
print(f" Final Status: {'ENABLED' if genai_enabled else 'DISABLED (using heuristics)'}", file=sys.stderr)
|
|
|
|
if genai_enabled:
|
|
analyzer = GenAIAnalyzer(max_tokens=50) # Short responses
|
|
|
|
if verbose or os.environ.get("DEBUG_GENAI"):
|
|
try:
|
|
from anthropic import Anthropic
|
|
print(f" Anthropic SDK: AVAILABLE", file=sys.stderr)
|
|
except ImportError:
|
|
print(f" Anthropic SDK: NOT INSTALLED (will use heuristics)", file=sys.stderr)
|
|
analyzer = None
|
|
|
|
# Scan root directory for files
|
|
for file in project_root.iterdir():
|
|
if not file.is_file():
|
|
continue
|
|
|
|
# Skip hidden files
|
|
if file.name.startswith('.'):
|
|
continue
|
|
|
|
# Analyze file with GenAI or heuristics
|
|
suggested_location, reason = analyze_file_with_genai(file, project_root, analyzer)
|
|
|
|
# Skip if suggested location is root
|
|
if suggested_location == "root":
|
|
continue
|
|
|
|
misplaced.append((file, suggested_location, reason))
|
|
|
|
return misplaced
|
|
|
|
|
|
def create_directory_structure(project_root: Path, structure: Dict) -> None:
|
|
"""Create required directories if they don't exist."""
|
|
for dir_name, config in structure.get("structure", {}).items():
|
|
if not dir_name.endswith("/"):
|
|
continue
|
|
|
|
if config.get("required", False):
|
|
dir_path = project_root / dir_name.rstrip("/")
|
|
dir_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Create subdirectories if specified
|
|
subdirs = config.get("subdirectories", {})
|
|
for subdir_name in subdirs.keys():
|
|
subdir_path = dir_path / subdir_name.rstrip("/")
|
|
subdir_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
def fix_file_organization(project_root: Path, misplaced: List[Tuple[Path, str, str]]) -> None:
|
|
"""Move misplaced files to correct locations."""
|
|
for file_path, target_dir, reason in misplaced:
|
|
if target_dir == "DELETE":
|
|
print(f" 🗑️ Deleting: {file_path.name} ({reason})")
|
|
file_path.unlink()
|
|
continue
|
|
|
|
target_path = project_root / target_dir / file_path.name
|
|
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
print(f" 📁 Moving: {file_path.name} → {target_dir}")
|
|
print(f" Reason: {reason}")
|
|
shutil.move(str(file_path), str(target_path))
|
|
|
|
|
|
def validate_structure(project_root: Path, fix: bool = False) -> Tuple[bool, str]:
|
|
"""
|
|
Validate project structure against standard template.
|
|
|
|
Args:
|
|
project_root: Project root directory
|
|
fix: If True, automatically fix violations
|
|
|
|
Returns:
|
|
(is_valid, message)
|
|
"""
|
|
structure = load_structure_template()
|
|
|
|
# Check required directories
|
|
missing_dirs = check_required_directories(project_root, structure)
|
|
|
|
# Check for misplaced files
|
|
misplaced_files = find_misplaced_files(project_root)
|
|
|
|
if not missing_dirs and not misplaced_files:
|
|
return True, "✅ Project structure follows standard organization"
|
|
|
|
# Report violations
|
|
message = "❌ Project structure violations found:\n\n"
|
|
|
|
if missing_dirs:
|
|
message += "Missing required directories:\n"
|
|
for dir_name in missing_dirs:
|
|
message += f" - {dir_name}\n"
|
|
message += "\n"
|
|
|
|
if misplaced_files:
|
|
message += "Misplaced files:\n"
|
|
for file_path, target, reason in misplaced_files:
|
|
if target == "DELETE":
|
|
message += f" - {file_path.name} → DELETE ({reason})\n"
|
|
else:
|
|
message += f" - {file_path.name} → {target} ({reason})\n"
|
|
message += "\n"
|
|
|
|
# Fix if requested
|
|
if fix:
|
|
message += "Fixing violations...\n\n"
|
|
|
|
if missing_dirs:
|
|
create_directory_structure(project_root, structure)
|
|
message += "✅ Created missing directories\n"
|
|
|
|
if misplaced_files:
|
|
fix_file_organization(project_root, misplaced_files)
|
|
message += f"✅ Moved {len(misplaced_files)} files to correct locations\n"
|
|
|
|
message += "\n✅ Project structure now follows standard organization"
|
|
return True, message
|
|
else:
|
|
message += "Run with --fix to automatically fix these issues:\n"
|
|
message += " python hooks/enforce_file_organization.py --fix"
|
|
return False, message
|
|
|
|
|
|
def main() -> int:
|
|
"""Main entry point."""
|
|
fix_mode = "--fix" in sys.argv
|
|
|
|
print("🔍 Validating project structure...\n")
|
|
|
|
project_root = get_project_root()
|
|
is_valid, message = validate_structure(project_root, fix=fix_mode)
|
|
|
|
print(message)
|
|
print()
|
|
|
|
if is_valid:
|
|
print("✅ Structure validation PASSED")
|
|
return 0
|
|
else:
|
|
print("❌ Structure validation FAILED")
|
|
print("\nStandard structure:")
|
|
print(" src/ - Source code")
|
|
print(" tests/ - Tests (unit/, integration/, uat/)")
|
|
print(" docs/ - Documentation")
|
|
print(" scripts/ - Utility scripts")
|
|
print(" .claude/ - Claude Code configuration")
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|