TradingAgents/.claude/lib/protected_file_detector.py

317 lines
10 KiB
Python

#!/usr/bin/env python3
"""
Protected File Detector - Detect user artifacts and protected files
This module identifies files that should be protected during installation,
including user-created artifacts, modified plugin files, and sensitive data.
Key Features:
- Always-protected files (.env, PROJECT.md, state files)
- Custom hook detection
- Plugin default comparison (hash-based)
- Glob pattern matching for protected patterns
- File categorization (config, state, custom_hook, modified_plugin)
Usage:
from protected_file_detector import ProtectedFileDetector
# Initialize with plugin defaults registry
detector = ProtectedFileDetector(plugin_defaults={
".claude/hooks/auto_format.py": "abc123...",
})
# Detect protected files
protected = detector.detect_protected_files(project_dir)
Date: 2025-12-09
Issue: #106 (GenAI-first installation system)
Agent: implementer
Design Patterns:
See library-design-patterns skill for standardized design patterns.
"""
import hashlib
import fnmatch
from pathlib import Path
from typing import List, Dict, Any, Optional
# Security utilities for path validation
try:
from plugins.autonomous_dev.lib.security_utils import audit_log
except ImportError:
from security_utils import audit_log
# Always-protected files (never overwritten)
ALWAYS_PROTECTED = [
".claude/PROJECT.md",
".env",
".env.local",
".claude/batch_state.json",
".claude/session_state.json",
]
# Protected file patterns (glob patterns)
PROTECTED_PATTERNS = [
".claude/hooks/custom_*.py", # Custom hooks
"*.env", # All .env files
"**/*.secret", # Secret files
]
class ProtectedFileDetector:
"""Detect user artifacts and protected files.
This class identifies files that should be protected during installation,
including user-created artifacts, modified plugin files, and sensitive data.
Attributes:
additional_patterns: Additional glob patterns to protect
plugin_defaults: Dict mapping file paths to their default hashes
Examples:
>>> detector = ProtectedFileDetector()
>>> protected = detector.detect_protected_files(project_dir)
>>> print(f"Found {len(protected)} protected files")
"""
def __init__(
self,
additional_patterns: Optional[List[str]] = None,
plugin_defaults: Optional[Dict[str, str]] = None
):
"""Initialize protected file detector.
Args:
additional_patterns: Additional glob patterns to protect
plugin_defaults: Dict mapping file paths to their default hashes
"""
self.additional_patterns = additional_patterns or []
self.plugin_defaults = plugin_defaults or {}
# Audit log initialization
audit_log("protected_file_detector", "initialized", {
"additional_patterns": len(self.additional_patterns),
"plugin_defaults": len(self.plugin_defaults)
})
def get_protected_patterns(self) -> List[str]:
"""Get all protected file patterns.
Returns:
List of glob patterns for protected files
"""
return ALWAYS_PROTECTED + PROTECTED_PATTERNS + self.additional_patterns
def has_plugin_default(self, file_path: str) -> bool:
"""Check if file has a known plugin default.
Args:
file_path: Relative file path
Returns:
True if file has plugin default registered
"""
return file_path in self.plugin_defaults
def detect_protected_files(self, project_dir: Path | str) -> List[Dict[str, Any]]:
"""Detect all protected files in project directory.
Args:
project_dir: Project directory to scan
Returns:
List of protected file dicts with:
- path: Relative path from project dir
- category: Type of protected file (config, state, custom_hook, modified_plugin)
- modified: True if modified from plugin default
- reason: Why file is protected
Examples:
>>> detector = ProtectedFileDetector()
>>> protected = detector.detect_protected_files(project_dir)
>>> for f in protected:
... print(f"{f['path']} - {f['reason']}")
"""
project_path = Path(project_dir) if isinstance(project_dir, str) else project_dir
project_path = project_path.resolve()
# Return empty list if project directory doesn't exist
if not project_path.exists():
return []
protected_files = []
# Scan project directory for files
for file_path in project_path.rglob("*"):
# Skip directories
if file_path.is_dir():
continue
# Get relative path from project dir
try:
relative_path = file_path.relative_to(project_path)
relative_str = str(relative_path).replace("\\", "/")
except ValueError:
continue
# Check if file is protected
protection_info = self._check_protection(relative_str, file_path)
if protection_info:
protected_files.append({
"path": relative_str,
**protection_info
})
return protected_files
def matches_pattern(self, file_path: str) -> bool:
"""Check if file path matches any protected pattern.
Args:
file_path: Relative file path
Returns:
True if file matches a protected pattern
Examples:
>>> detector = ProtectedFileDetector(additional_patterns=["*.env"])
>>> detector.matches_pattern("production.env")
True
"""
all_patterns = self.get_protected_patterns()
for pattern in all_patterns:
# Use fnmatch for glob pattern matching
if fnmatch.fnmatch(file_path, pattern):
return True
return False
def calculate_hash(self, file_path: Path) -> str:
"""Calculate SHA256 hash of file.
Args:
file_path: Path to file
Returns:
SHA256 hex digest
Examples:
>>> detector = ProtectedFileDetector()
>>> hash_val = detector.calculate_hash(Path("file.py"))
"""
sha256 = hashlib.sha256()
# Read file in chunks to handle large files
with open(file_path, "rb") as f:
while chunk := f.read(8192):
sha256.update(chunk)
return sha256.hexdigest()
def matches_plugin_default(self, file_path: Path, relative_path: str) -> bool:
"""Check if file matches its plugin default hash.
Args:
file_path: Absolute path to file
relative_path: Relative path for lookup in plugin_defaults
Returns:
True if file hash matches plugin default
Examples:
>>> detector = ProtectedFileDetector(plugin_defaults={
... "hook.py": "abc123..."
... })
>>> detector.matches_plugin_default(Path("hook.py"), "hook.py")
"""
# Check if we have a default hash for this file
if relative_path not in self.plugin_defaults:
return False
# Calculate current file hash
current_hash = self.calculate_hash(file_path)
# Compare with default hash
return current_hash == self.plugin_defaults[relative_path]
def _check_protection(self, relative_path: str, full_path: Path) -> Optional[Dict[str, Any]]:
"""Check if file should be protected and categorize it.
Args:
relative_path: Relative path from project root
full_path: Full path to file
Returns:
Dict with protection info or None if not protected
"""
# Check if file is modified from plugin default (check this first)
if self.has_plugin_default(relative_path):
if not self.matches_plugin_default(full_path, relative_path):
return {
"category": "modified",
"modified": True,
"reason": "Modified from plugin default"
}
# File matches plugin default, so it's not protected
return None
# Check always-protected files (these are user artifacts)
if relative_path in ALWAYS_PROTECTED:
# These are always user-created, so category is "new"
return {
"category": "new",
"modified": False,
"reason": "User artifact (always protected)"
}
# Check if file matches protected patterns
if self.matches_pattern(relative_path):
# Determine if it's a custom hook
if "custom_" in relative_path and relative_path.endswith(".py"):
return {
"category": "custom_hook",
"modified": False,
"reason": "Custom user hook"
}
# Other protected patterns - categorize appropriately
category = self._categorize_file(relative_path)
return {
"category": category,
"modified": False,
"reason": f"Matches protected pattern"
}
return None
def _categorize_file(self, file_path: str) -> str:
"""Categorize protected file type.
Args:
file_path: Relative file path
Returns:
Category string (config, state, custom_hook, modified, new)
"""
# State files
if "state.json" in file_path or "batch_" in file_path:
return "state"
# Config files
if file_path.endswith("PROJECT.md") or ".env" in file_path:
return "config"
# Custom hooks
if "custom_" in file_path and file_path.endswith(".py"):
return "custom_hook"
# Modified plugin files
if self.has_plugin_default(file_path):
return "modified"
# New user files
return "new"