TradingAgents/.claude/lib/orphan_file_cleaner.py

766 lines
26 KiB
Python

#!/usr/bin/env python3
"""
Orphan File Cleaner - Detect and remove orphaned files after marketplace updates
This module provides orphan detection and cleanup to improve the marketplace update
UX by removing files that are no longer part of the plugin after an update.
Features:
- Detect orphaned commands, hooks, and agents in .claude/ subdirectories
- Dry-run mode: Report orphans without deleting (default)
- Confirm mode: Delete only with explicit user approval
- Auto mode: Delete automatically without confirmation
- Security: Whitelist validation, audit logging
- Clear reporting of cleanup operations
Security:
- All file paths validated via security_utils.validate_path()
- Only operates within .claude/ subdirectories
- Prevents path traversal (CWE-22)
- Rejects symlink attacks (CWE-59)
- Audit logging for all deletions
Usage:
from orphan_file_cleaner import detect_orphans, cleanup_orphans
# Detect orphans (dry-run)
orphans = detect_orphans("/path/to/project")
print(f"Found {len(orphans)} orphaned files")
# Cleanup with confirmation
result = cleanup_orphans("/path/to/project", mode="confirm")
if result.success:
print(f"Deleted {result.deleted_count} files")
# Low-level API
cleaner = OrphanFileCleaner("/path/to/project")
orphans = cleaner.detect_orphans()
result = cleaner.cleanup_orphans(orphans, mode="auto")
Date: 2025-11-08
Issue: GitHub #50 - Fix Marketplace Update UX
Agent: implementer
Design Patterns:
See library-design-patterns skill for standardized design patterns.
"""
import json
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Optional, Set
# Import with fallback for both dev (plugins/) and installed (.claude/lib/) environments
try:
from plugins.autonomous_dev.lib.security_utils import validate_path, audit_log
except ImportError:
from security_utils import validate_path, audit_log
@dataclass
class OrphanFile:
"""
See error-handling-patterns skill for exception hierarchy and error handling best practices.
Representation of an orphaned file.
Attributes:
path: Full path to the orphaned file
category: File category ("command", "hook", "agent")
is_orphan: Whether file is confirmed orphan (always True)
reason: Human-readable reason why file is orphaned
"""
path: Path
category: str
is_orphan: bool = True
reason: str = ""
def __post_init__(self):
"""Set default reason if not provided."""
if not self.reason:
self.reason = f"Not listed in plugin.json {self.category}s"
@dataclass
class CleanupResult:
"""Result of orphan cleanup operation.
Attributes:
orphans_detected: Number of orphans detected
orphans_deleted: Number of orphans deleted
dry_run: Whether this was a dry-run (no deletions)
errors: Number of errors encountered (or list of error messages)
orphans: List of detected orphan files
success: Whether cleanup succeeded (auto-set from errors)
error_message: Optional error message for failed operations
files_removed: Optional parameter, alias for orphans_deleted
"""
orphans_detected: int = 0
orphans_deleted: int = 0
dry_run: bool = True
errors: int = 0
orphans: List[OrphanFile] = field(default_factory=list)
success: bool = True
error_message: str = ""
files_removed: int = 0
def __post_init__(self):
"""Set success flag based on errors and sync files_removed with orphans_deleted."""
# If files_removed is provided and differs from orphans_deleted, use files_removed
if self.files_removed > 0 and self.orphans_deleted == 0:
self.orphans_deleted = self.files_removed
elif self.orphans_deleted > 0 and self.files_removed == 0:
self.files_removed = self.orphans_deleted
# Set success flag
self.success = self.errors == 0 and not self.error_message
@property
def summary(self) -> str:
"""Generate human-readable summary of cleanup result.
Returns:
Summary message describing cleanup outcome
"""
if self.dry_run:
msg = f"Detected {self.orphans_detected} orphaned files (dry-run, no deletions)"
elif self.orphans_deleted == 0:
msg = f"No orphaned files deleted ({self.orphans_detected} detected)"
else:
msg = f"Deleted {self.orphans_deleted} orphaned files ({self.orphans_detected} detected)"
# Include error count if any
if self.errors > 0:
msg += f", {self.errors} errors"
return msg
def summary_message(self) -> str:
"""Alias for summary property (backwards compatibility)."""
return self.summary
# Exception hierarchy pattern from error-handling-patterns skill:
# BaseException -> Exception -> AutonomousDevError -> DomainError(BaseException) -> SpecificError
class OrphanDetectionError(Exception):
"""Exception raised for orphan detection errors."""
pass
class OrphanFileCleaner:
"""Cleaner for orphaned files after marketplace plugin updates.
Attributes:
project_root: Validated project root path
plugin_name: Plugin name to check (default: "autonomous-dev")
"""
# Categories to check for orphans
CATEGORIES = ["commands", "hooks", "agents"]
# Files to ignore (always present)
IGNORE_FILES = {"__pycache__", "__init__.py", "__init__.pyc", ".DS_Store"}
def __init__(self, project_root: Path, plugin_name: str = "autonomous-dev"):
"""Initialize orphan file cleaner.
Args:
project_root: Path to project root directory
plugin_name: Plugin name (default: "autonomous-dev")
Raises:
ValueError: If path fails security validation
"""
# Validate project root
try:
validated_root = validate_path(project_root, "project root")
self.project_root = Path(validated_root).resolve()
except ValueError as e:
audit_log(
"orphan_cleanup",
"failure",
{
"operation": "init",
"project_root": str(project_root),
"error": str(e),
},
)
raise
self.plugin_name = plugin_name
# Set up project-specific audit log
self.audit_log_file = self.project_root / "logs" / "orphan_cleanup_audit.log"
def _write_audit_log(self, operation: str, path: str, category: str, **kwargs):
"""Write to project-specific orphan cleanup audit log (JSON format).
Args:
operation: Operation performed (e.g., "delete_orphan")
path: File path affected
category: File category (command, hook, agent)
**kwargs: Additional metadata to include in log
"""
# Create logs directory if it doesn't exist
self.audit_log_file.parent.mkdir(parents=True, exist_ok=True)
# Create log entry
from datetime import datetime
log_entry = {
"timestamp": datetime.now().isoformat(),
"operation": operation,
"path": path,
"category": category,
"user": os.getenv("USER", "unknown"),
}
log_entry.update(kwargs)
# Append JSON entry to audit log
with open(self.audit_log_file, "a") as f:
f.write(json.dumps(log_entry) + "\n")
def _read_plugin_json(self) -> dict:
"""Read plugin.json to get list of expected files.
Returns:
Parsed plugin.json data
Raises:
OrphanDetectionError: If plugin.json not found or corrupted
"""
plugin_json = (
self.project_root
/ ".claude"
/ "plugins"
/ self.plugin_name
/ "plugin.json"
)
if not plugin_json.exists():
raise OrphanDetectionError(
f"Plugin not found: {plugin_json}\n"
f"Expected: plugin.json file for {self.plugin_name}\n"
f"Hint: Run /sync marketplace first to install plugin"
)
# Validate path before reading
try:
validated_path = validate_path(plugin_json, "plugin.json")
except ValueError as e:
audit_log(
"orphan_cleanup",
"security_violation",
{
"operation": "_read_plugin_json",
"path": str(plugin_json),
"error": str(e),
},
)
raise OrphanDetectionError(f"Security validation failed: {e}")
# Parse JSON
try:
with open(validated_path, "r") as f:
return json.load(f)
except json.JSONDecodeError as e:
raise OrphanDetectionError(
f"Corrupted plugin.json: {plugin_json}\n"
f"JSON parse error: {e}\n"
f"Expected: Valid JSON file"
)
def _get_expected_files(self, category: str, plugin_data: dict) -> Set[str]:
"""Get set of expected files for a category from plugin.json.
Args:
category: Category name ("commands", "hooks", "agents")
plugin_data: Parsed plugin.json data
Returns:
Set of expected filenames for this category
"""
# Get list from plugin.json (may be missing)
file_list = plugin_data.get(category, [])
# Normalize to set of filenames
return set(file_list) if file_list else set()
def _get_actual_files(self, category: str) -> List[Path]:
"""Get list of actual files in category directory.
Args:
category: Category name ("commands", "hooks", "agents")
Returns:
List of file paths in the category directory
Note:
Commands and hooks are in .claude/commands/ and .claude/hooks/
Agents are in .claude/plugins/autonomous-dev/agents/
"""
# Agents are in plugin directory, commands/hooks are in .claude/
if category == "agents":
category_dir = (
self.project_root
/ ".claude"
/ "plugins"
/ self.plugin_name
/ category
)
else:
category_dir = self.project_root / ".claude" / category
# Return empty list if directory doesn't exist
if not category_dir.exists():
return []
# Get all files, excluding ignored ones
files = []
for file_path in category_dir.iterdir():
# Skip directories and ignored files
if file_path.is_dir() and file_path.name in self.IGNORE_FILES:
continue
if file_path.name in self.IGNORE_FILES:
continue
if file_path.is_file():
files.append(file_path)
return files
def find_duplicate_libs(self) -> List[Path]:
"""Find Python files in .claude/lib/ directory (duplicate library location).
This method detects duplicate libraries in the legacy .claude/lib/ location
that should be removed to prevent import conflicts. The canonical location
for libraries is plugins/autonomous-dev/lib/.
Returns:
List of Path objects for duplicate library files found.
Excludes __init__.py and __pycache__ directories.
Note:
Returns empty list if .claude/lib/ doesn't exist or is empty.
Example:
>>> cleaner = OrphanFileCleaner(project_root)
>>> duplicates = cleaner.find_duplicate_libs()
>>> print(f"Found {len(duplicates)} duplicate libraries")
"""
# Path to legacy lib directory
lib_dir = self.project_root / ".claude" / "lib"
# Return empty list if directory doesn't exist
if not lib_dir.exists():
return []
duplicates = []
# Recursively find all Python files
for py_file in lib_dir.rglob("*.py"):
# Skip __pycache__ directories
if "__pycache__" in str(py_file):
continue
# Skip __init__.py files (they're infrastructure, not duplicates)
if py_file.name == "__init__.py":
continue
# Add to duplicates list
duplicates.append(py_file)
return duplicates
def pre_install_cleanup(self) -> CleanupResult:
"""Remove .claude/lib/ directory before installation to prevent duplicates.
This method performs pre-installation cleanup by removing the legacy
.claude/lib/ directory. This prevents import conflicts when installing
or updating the plugin, as all libraries should reside in
plugins/autonomous-dev/lib/.
Returns:
CleanupResult with success status, files_removed count, and error_message.
Note:
- Idempotent: Safe to call even if .claude/lib/ doesn't exist
- Logs operation to audit trail
- Handles permission errors gracefully
Security:
- Validates all paths before removal
- Audit logs all operations
- Handles symlinks safely (removes link, not target)
Example:
>>> cleaner = OrphanFileCleaner(project_root)
>>> result = cleaner.pre_install_cleanup()
>>> if result.success:
... print(f"Removed {result.files_removed} duplicate files")
"""
import shutil
lib_dir = self.project_root / ".claude" / "lib"
# If directory doesn't exist, nothing to clean
if not lib_dir.exists():
return CleanupResult(
orphans_detected=0,
orphans_deleted=0,
dry_run=False,
errors=0,
success=True,
error_message="",
)
try:
# Handle symlinks specially BEFORE validate_path (which rejects symlinks)
if lib_dir.is_symlink():
# For symlinks, just unlink the symlink itself (don't follow it)
# Skip validate_path for symlinks since it rejects them (CWE-59 protection)
file_count = 0 # Symlinks don't count as files removed
# Audit log the symlink removal
audit_log(
"orphan_cleanup",
"success",
{
"operation": "pre_install_cleanup",
"path": str(lib_dir),
"type": "symlink",
"files_removed": 0,
},
)
lib_dir.unlink()
return CleanupResult(
orphans_detected=0,
orphans_deleted=0,
dry_run=False,
errors=0,
success=True,
error_message="",
)
# For regular directories, validate path before removal (security check)
try:
validated_path = validate_path(lib_dir, ".claude/lib directory")
except ValueError as e:
audit_log(
"orphan_cleanup",
"security_violation",
{
"operation": "pre_install_cleanup",
"path": str(lib_dir),
"error": str(e),
},
)
return CleanupResult(
orphans_detected=0,
orphans_deleted=0,
dry_run=False,
errors=1,
success=False,
error_message=f"Security validation failed: {e}",
)
# Count files before removal (for reporting)
file_count = 0
for py_file in lib_dir.rglob("*.py"):
if "__pycache__" not in str(py_file) and py_file.name != "__init__.py":
file_count += 1
# Remove the entire .claude/lib/ directory
shutil.rmtree(validated_path)
# Audit log the cleanup
audit_log(
"orphan_cleanup",
"success",
{
"operation": "pre_install_cleanup",
"path": str(lib_dir),
"files_removed": file_count,
},
)
# Project-specific audit log
self._write_audit_log(
operation="pre_install_cleanup",
path=str(lib_dir),
category="lib",
files_removed=file_count,
status="removed",
)
return CleanupResult(
orphans_detected=file_count,
orphans_deleted=file_count,
dry_run=False,
errors=0,
success=True,
error_message="",
)
except PermissionError as e:
audit_log(
"orphan_cleanup",
"permission_denied",
{
"operation": "pre_install_cleanup",
"path": str(lib_dir),
"error": str(e),
},
)
return CleanupResult(
orphans_detected=file_count if 'file_count' in locals() else 0,
orphans_deleted=0,
dry_run=False,
errors=1,
success=False,
error_message=f"Permission denied: {e}",
)
except Exception as e:
audit_log(
"orphan_cleanup",
"failure",
{
"operation": "pre_install_cleanup",
"path": str(lib_dir),
"error": str(e),
},
)
return CleanupResult(
orphans_detected=file_count if 'file_count' in locals() else 0,
orphans_deleted=0,
dry_run=False,
errors=1,
success=False,
error_message=str(e),
)
def detect_orphans(self) -> List[OrphanFile]:
"""Detect orphaned files in all categories.
Returns:
List of OrphanFile objects for detected orphans
Raises:
OrphanDetectionError: If plugin.json not found or detection fails
"""
# Read plugin.json
plugin_data = self._read_plugin_json()
orphans = []
# Check each category
for category in self.CATEGORIES:
# Get expected files from plugin.json
expected_files = self._get_expected_files(category, plugin_data)
# Get actual files from filesystem
actual_files = self._get_actual_files(category)
# Find orphans (files not in expected list)
for file_path in actual_files:
if file_path.name not in expected_files:
orphan = OrphanFile(
path=file_path,
category=category.rstrip("s"), # "commands" -> "command"
reason=f"Not listed in plugin.json {category}",
)
orphans.append(orphan)
return orphans
def cleanup_orphans(
self,
orphans: Optional[List[OrphanFile]] = None,
dry_run: Optional[bool] = None,
confirm: bool = False,
input_func=None,
) -> CleanupResult:
"""Cleanup orphaned files.
Args:
orphans: Optional list of OrphanFile objects to cleanup (auto-detects if None)
dry_run: Whether to only report without deleting (default: True)
confirm: Whether to prompt for confirmation before deleting (default: False = auto-approve)
input_func: Optional input function for testing (default: built-in input)
Returns:
CleanupResult with cleanup outcome
"""
# Auto-detect orphans if not provided
if orphans is None:
orphans = self.detect_orphans()
# Use built-in input if not provided
if input_func is None:
input_func = input
# Determine effective dry_run value
# If dry_run not specified: confirm=True means False (delete with prompts), otherwise True (safe default)
# If dry_run explicitly specified: use that value
if dry_run is None:
effective_dry_run = not confirm # confirm=True -> dry_run=False
else:
effective_dry_run = dry_run
result = CleanupResult(
orphans_detected=len(orphans),
orphans_deleted=0,
dry_run=effective_dry_run,
errors=0,
orphans=orphans,
)
# Dry-run mode: just report, don't delete
if effective_dry_run:
return result
# Delete orphans
error_count = 0
for orphan in orphans:
try:
# Confirm mode: ask user before deleting
if confirm:
response = input_func(
f"Delete orphaned {orphan.category} '{orphan.path.name}'? (y/n): "
)
if response.lower() != "y":
continue
# Validate path before deletion (security)
try:
validated_path = validate_path(orphan.path, "orphan file")
except ValueError as e:
audit_log(
"orphan_cleanup",
"security_violation",
{
"operation": "delete_orphan",
"path": str(orphan.path),
"error": str(e),
},
)
error_count += 1
continue
# Delete file
Path(validated_path).unlink()
# Audit log deletion (both global security log and project-specific log)
audit_log(
"orphan_cleanup",
"success",
{
"operation": "delete_orphan",
"path": str(orphan.path),
"category": orphan.category,
"dry_run": effective_dry_run,
"confirm": confirm,
},
)
# Project-specific audit log
self._write_audit_log(
operation="delete_orphan",
path=str(orphan.path),
category=orphan.category,
reason=orphan.reason,
status="deleted",
)
result.orphans_deleted += 1
except PermissionError as e:
error_count += 1
audit_log(
"orphan_cleanup",
"permission_denied",
{
"operation": "delete_orphan",
"path": str(orphan.path),
"category": orphan.category,
"error": str(e),
},
)
except Exception as e:
error_count += 1
audit_log(
"orphan_cleanup",
"failure",
{
"operation": "delete_orphan",
"path": str(orphan.path),
"category": orphan.category,
"error": str(e),
},
)
result.errors = error_count
return result
def detect_orphans(
project_root: str,
plugin_name: str = "autonomous-dev",
) -> List[OrphanFile]:
"""Detect orphaned files in project (high-level convenience function).
Args:
project_root: Path to project root directory
plugin_name: Plugin name (default: "autonomous-dev")
Returns:
List of OrphanFile objects for detected orphans
Raises:
ValueError: If path fails security validation
OrphanDetectionError: If plugin.json not found or detection fails
Example:
>>> orphans = detect_orphans("/path/to/project")
>>> print(f"Found {len(orphans)} orphaned files")
"""
cleaner = OrphanFileCleaner(Path(project_root), plugin_name)
return cleaner.detect_orphans()
def cleanup_orphans(
project_root: str,
dry_run: bool = True,
confirm: bool = False,
plugin_name: str = "autonomous-dev",
) -> CleanupResult:
"""Cleanup orphaned files in project (high-level convenience function).
Args:
project_root: Path to project root directory
dry_run: Whether to only report without deleting (default: True)
confirm: Whether to prompt for confirmation before deleting (default: False = auto-approve)
plugin_name: Plugin name (default: "autonomous-dev")
Returns:
CleanupResult with cleanup outcome
Raises:
ValueError: If path fails security validation
OrphanDetectionError: If plugin.json not found or cleanup fails
Example:
>>> result = cleanup_orphans("/path/to/project", dry_run=False, confirm=False)
>>> if result.success:
... print(f"Deleted {result.orphans_deleted} files")
"""
cleaner = OrphanFileCleaner(Path(project_root), plugin_name)
orphans = cleaner.detect_orphans()
return cleaner.cleanup_orphans(orphans, dry_run=dry_run, confirm=confirm)