317 lines
10 KiB
Python
317 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Protected File Detector - Detect user artifacts and protected files
|
|
|
|
This module identifies files that should be protected during installation,
|
|
including user-created artifacts, modified plugin files, and sensitive data.
|
|
|
|
Key Features:
|
|
- Always-protected files (.env, PROJECT.md, state files)
|
|
- Custom hook detection
|
|
- Plugin default comparison (hash-based)
|
|
- Glob pattern matching for protected patterns
|
|
- File categorization (config, state, custom_hook, modified_plugin)
|
|
|
|
Usage:
|
|
from protected_file_detector import ProtectedFileDetector
|
|
|
|
# Initialize with plugin defaults registry
|
|
detector = ProtectedFileDetector(plugin_defaults={
|
|
".claude/hooks/auto_format.py": "abc123...",
|
|
})
|
|
|
|
# Detect protected files
|
|
protected = detector.detect_protected_files(project_dir)
|
|
|
|
Date: 2025-12-09
|
|
Issue: #106 (GenAI-first installation system)
|
|
Agent: implementer
|
|
|
|
Design Patterns:
|
|
See library-design-patterns skill for standardized design patterns.
|
|
"""
|
|
|
|
import hashlib
|
|
import fnmatch
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any, Optional
|
|
|
|
# Security utilities for path validation
|
|
try:
|
|
from plugins.autonomous_dev.lib.security_utils import audit_log
|
|
except ImportError:
|
|
from security_utils import audit_log
|
|
|
|
|
|
# Always-protected files (never overwritten)
|
|
ALWAYS_PROTECTED = [
|
|
".claude/PROJECT.md",
|
|
".env",
|
|
".env.local",
|
|
".claude/batch_state.json",
|
|
".claude/session_state.json",
|
|
]
|
|
|
|
# Protected file patterns (glob patterns)
|
|
PROTECTED_PATTERNS = [
|
|
".claude/hooks/custom_*.py", # Custom hooks
|
|
"*.env", # All .env files
|
|
"**/*.secret", # Secret files
|
|
]
|
|
|
|
|
|
class ProtectedFileDetector:
|
|
"""Detect user artifacts and protected files.
|
|
|
|
This class identifies files that should be protected during installation,
|
|
including user-created artifacts, modified plugin files, and sensitive data.
|
|
|
|
Attributes:
|
|
additional_patterns: Additional glob patterns to protect
|
|
plugin_defaults: Dict mapping file paths to their default hashes
|
|
|
|
Examples:
|
|
>>> detector = ProtectedFileDetector()
|
|
>>> protected = detector.detect_protected_files(project_dir)
|
|
>>> print(f"Found {len(protected)} protected files")
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
additional_patterns: Optional[List[str]] = None,
|
|
plugin_defaults: Optional[Dict[str, str]] = None
|
|
):
|
|
"""Initialize protected file detector.
|
|
|
|
Args:
|
|
additional_patterns: Additional glob patterns to protect
|
|
plugin_defaults: Dict mapping file paths to their default hashes
|
|
"""
|
|
self.additional_patterns = additional_patterns or []
|
|
self.plugin_defaults = plugin_defaults or {}
|
|
|
|
# Audit log initialization
|
|
audit_log("protected_file_detector", "initialized", {
|
|
"additional_patterns": len(self.additional_patterns),
|
|
"plugin_defaults": len(self.plugin_defaults)
|
|
})
|
|
|
|
def get_protected_patterns(self) -> List[str]:
|
|
"""Get all protected file patterns.
|
|
|
|
Returns:
|
|
List of glob patterns for protected files
|
|
"""
|
|
return ALWAYS_PROTECTED + PROTECTED_PATTERNS + self.additional_patterns
|
|
|
|
def has_plugin_default(self, file_path: str) -> bool:
|
|
"""Check if file has a known plugin default.
|
|
|
|
Args:
|
|
file_path: Relative file path
|
|
|
|
Returns:
|
|
True if file has plugin default registered
|
|
"""
|
|
return file_path in self.plugin_defaults
|
|
|
|
def detect_protected_files(self, project_dir: Path | str) -> List[Dict[str, Any]]:
|
|
"""Detect all protected files in project directory.
|
|
|
|
Args:
|
|
project_dir: Project directory to scan
|
|
|
|
Returns:
|
|
List of protected file dicts with:
|
|
- path: Relative path from project dir
|
|
- category: Type of protected file (config, state, custom_hook, modified_plugin)
|
|
- modified: True if modified from plugin default
|
|
- reason: Why file is protected
|
|
|
|
Examples:
|
|
>>> detector = ProtectedFileDetector()
|
|
>>> protected = detector.detect_protected_files(project_dir)
|
|
>>> for f in protected:
|
|
... print(f"{f['path']} - {f['reason']}")
|
|
"""
|
|
project_path = Path(project_dir) if isinstance(project_dir, str) else project_dir
|
|
project_path = project_path.resolve()
|
|
|
|
# Return empty list if project directory doesn't exist
|
|
if not project_path.exists():
|
|
return []
|
|
|
|
protected_files = []
|
|
|
|
# Scan project directory for files
|
|
for file_path in project_path.rglob("*"):
|
|
# Skip directories
|
|
if file_path.is_dir():
|
|
continue
|
|
|
|
# Get relative path from project dir
|
|
try:
|
|
relative_path = file_path.relative_to(project_path)
|
|
relative_str = str(relative_path).replace("\\", "/")
|
|
except ValueError:
|
|
continue
|
|
|
|
# Check if file is protected
|
|
protection_info = self._check_protection(relative_str, file_path)
|
|
if protection_info:
|
|
protected_files.append({
|
|
"path": relative_str,
|
|
**protection_info
|
|
})
|
|
|
|
return protected_files
|
|
|
|
def matches_pattern(self, file_path: str) -> bool:
|
|
"""Check if file path matches any protected pattern.
|
|
|
|
Args:
|
|
file_path: Relative file path
|
|
|
|
Returns:
|
|
True if file matches a protected pattern
|
|
|
|
Examples:
|
|
>>> detector = ProtectedFileDetector(additional_patterns=["*.env"])
|
|
>>> detector.matches_pattern("production.env")
|
|
True
|
|
"""
|
|
all_patterns = self.get_protected_patterns()
|
|
|
|
for pattern in all_patterns:
|
|
# Use fnmatch for glob pattern matching
|
|
if fnmatch.fnmatch(file_path, pattern):
|
|
return True
|
|
|
|
return False
|
|
|
|
def calculate_hash(self, file_path: Path) -> str:
|
|
"""Calculate SHA256 hash of file.
|
|
|
|
Args:
|
|
file_path: Path to file
|
|
|
|
Returns:
|
|
SHA256 hex digest
|
|
|
|
Examples:
|
|
>>> detector = ProtectedFileDetector()
|
|
>>> hash_val = detector.calculate_hash(Path("file.py"))
|
|
"""
|
|
sha256 = hashlib.sha256()
|
|
|
|
# Read file in chunks to handle large files
|
|
with open(file_path, "rb") as f:
|
|
while chunk := f.read(8192):
|
|
sha256.update(chunk)
|
|
|
|
return sha256.hexdigest()
|
|
|
|
def matches_plugin_default(self, file_path: Path, relative_path: str) -> bool:
|
|
"""Check if file matches its plugin default hash.
|
|
|
|
Args:
|
|
file_path: Absolute path to file
|
|
relative_path: Relative path for lookup in plugin_defaults
|
|
|
|
Returns:
|
|
True if file hash matches plugin default
|
|
|
|
Examples:
|
|
>>> detector = ProtectedFileDetector(plugin_defaults={
|
|
... "hook.py": "abc123..."
|
|
... })
|
|
>>> detector.matches_plugin_default(Path("hook.py"), "hook.py")
|
|
"""
|
|
# Check if we have a default hash for this file
|
|
if relative_path not in self.plugin_defaults:
|
|
return False
|
|
|
|
# Calculate current file hash
|
|
current_hash = self.calculate_hash(file_path)
|
|
|
|
# Compare with default hash
|
|
return current_hash == self.plugin_defaults[relative_path]
|
|
|
|
def _check_protection(self, relative_path: str, full_path: Path) -> Optional[Dict[str, Any]]:
|
|
"""Check if file should be protected and categorize it.
|
|
|
|
Args:
|
|
relative_path: Relative path from project root
|
|
full_path: Full path to file
|
|
|
|
Returns:
|
|
Dict with protection info or None if not protected
|
|
"""
|
|
# Check if file is modified from plugin default (check this first)
|
|
if self.has_plugin_default(relative_path):
|
|
if not self.matches_plugin_default(full_path, relative_path):
|
|
return {
|
|
"category": "modified",
|
|
"modified": True,
|
|
"reason": "Modified from plugin default"
|
|
}
|
|
# File matches plugin default, so it's not protected
|
|
return None
|
|
|
|
# Check always-protected files (these are user artifacts)
|
|
if relative_path in ALWAYS_PROTECTED:
|
|
# These are always user-created, so category is "new"
|
|
return {
|
|
"category": "new",
|
|
"modified": False,
|
|
"reason": "User artifact (always protected)"
|
|
}
|
|
|
|
# Check if file matches protected patterns
|
|
if self.matches_pattern(relative_path):
|
|
# Determine if it's a custom hook
|
|
if "custom_" in relative_path and relative_path.endswith(".py"):
|
|
return {
|
|
"category": "custom_hook",
|
|
"modified": False,
|
|
"reason": "Custom user hook"
|
|
}
|
|
|
|
# Other protected patterns - categorize appropriately
|
|
category = self._categorize_file(relative_path)
|
|
return {
|
|
"category": category,
|
|
"modified": False,
|
|
"reason": f"Matches protected pattern"
|
|
}
|
|
|
|
return None
|
|
|
|
def _categorize_file(self, file_path: str) -> str:
|
|
"""Categorize protected file type.
|
|
|
|
Args:
|
|
file_path: Relative file path
|
|
|
|
Returns:
|
|
Category string (config, state, custom_hook, modified, new)
|
|
"""
|
|
# State files
|
|
if "state.json" in file_path or "batch_" in file_path:
|
|
return "state"
|
|
|
|
# Config files
|
|
if file_path.endswith("PROJECT.md") or ".env" in file_path:
|
|
return "config"
|
|
|
|
# Custom hooks
|
|
if "custom_" in file_path and file_path.endswith(".py"):
|
|
return "custom_hook"
|
|
|
|
# Modified plugin files
|
|
if self.has_plugin_default(file_path):
|
|
return "modified"
|
|
|
|
# New user files
|
|
return "new"
|