355 lines
10 KiB
Python
355 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
File Discovery Engine - Comprehensive plugin file discovery for installation
|
|
|
|
This module provides comprehensive file discovery for plugin installation,
|
|
ensuring 100% file coverage (all 201+ files) instead of the current ~76% (152 files).
|
|
|
|
Key Features:
|
|
- Recursive directory traversal (finds all files, not just *.md)
|
|
- Intelligent exclusion patterns (cache, build artifacts, hidden files)
|
|
- Nested skill structure support (skills/[name].skill/docs/...)
|
|
- Installation manifest generation
|
|
- Coverage validation
|
|
|
|
Current Problem:
|
|
- install.sh uses shallow glob patterns (*.md) - misses Python files
|
|
- Only copies ~152 of 201 files (76% coverage)
|
|
- Missing: All 9 scripts/, 23 of 48 lib/ files, 3 agent implementations
|
|
|
|
Solution:
|
|
- Comprehensive recursive file discovery
|
|
- Structured copy preserving directory hierarchy
|
|
- Validation to detect missing files
|
|
|
|
Usage:
|
|
from file_discovery import FileDiscovery
|
|
|
|
# Discover all files
|
|
discovery = FileDiscovery(plugin_dir)
|
|
files = discovery.discover_all_files() # Returns list of Path objects
|
|
|
|
# Generate manifest
|
|
manifest = discovery.generate_manifest()
|
|
|
|
# Validate against manifest
|
|
missing = discovery.validate_against_manifest(manifest)
|
|
|
|
Date: 2025-11-17
|
|
Issue: GitHub #80 (Bootstrap overhaul - 100% file coverage)
|
|
Agent: implementer
|
|
|
|
Design Patterns:
|
|
See library-design-patterns skill for standardized design patterns.
|
|
See file-organization skill for directory structure patterns.
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any
|
|
|
|
# Security utilities for path validation and audit logging
|
|
try:
|
|
from plugins.autonomous_dev.lib.security_utils import validate_path, audit_log
|
|
except ImportError:
|
|
from security_utils import validate_path, audit_log
|
|
|
|
|
|
# Exclusion patterns for file discovery
|
|
EXCLUDE_PATTERNS = {
|
|
# Cache and build artifacts
|
|
"__pycache__",
|
|
"*.pyc",
|
|
"*.pyo",
|
|
"*.pyd",
|
|
".pytest_cache",
|
|
"*.egg-info",
|
|
".eggs",
|
|
"*.egg",
|
|
"build",
|
|
"dist",
|
|
|
|
# Version control
|
|
".git",
|
|
".gitignore",
|
|
".gitattributes",
|
|
|
|
# IDE and editor files
|
|
".vscode",
|
|
".idea",
|
|
"*.swp",
|
|
"*.swo",
|
|
".DS_Store",
|
|
|
|
# Temporary files
|
|
"*.tmp",
|
|
"*.bak",
|
|
"*.log",
|
|
"*~",
|
|
}
|
|
|
|
# Directory patterns to exclude (partial match)
|
|
EXCLUDE_DIR_PATTERNS = [
|
|
".egg-info",
|
|
"__pycache__",
|
|
".pytest_cache",
|
|
".git",
|
|
".eggs",
|
|
"build",
|
|
"dist",
|
|
]
|
|
|
|
# Hidden files to INCLUDE (exceptions to hidden file exclusion)
|
|
INCLUDE_HIDDEN = {
|
|
".env.example",
|
|
}
|
|
|
|
|
|
class FileDiscovery:
|
|
"""Comprehensive file discovery for plugin installation.
|
|
|
|
Discovers all files in plugin directory with intelligent exclusions,
|
|
supporting nested structures (skills, lib, scripts, etc.).
|
|
|
|
Attributes:
|
|
plugin_dir: Path to plugin directory (e.g., plugins/autonomous-dev/)
|
|
|
|
Examples:
|
|
>>> discovery = FileDiscovery(plugin_dir)
|
|
>>> files = discovery.discover_all_files()
|
|
>>> print(f"Found {len(files)} files")
|
|
>>> manifest = discovery.generate_manifest()
|
|
"""
|
|
|
|
def __init__(self, plugin_dir: Path):
|
|
"""Initialize file discovery for plugin directory.
|
|
|
|
Args:
|
|
plugin_dir: Path to plugin directory
|
|
|
|
Raises:
|
|
FileNotFoundError: If plugin directory doesn't exist
|
|
ValueError: If path validation fails (path traversal, symlink)
|
|
"""
|
|
# Validate plugin directory path (prevents CWE-22, CWE-59)
|
|
self.plugin_dir = validate_path(
|
|
Path(plugin_dir).resolve(),
|
|
purpose="plugin directory",
|
|
allow_missing=False
|
|
)
|
|
|
|
# Audit log initialization
|
|
audit_log("file_discovery", "initialized", {
|
|
"plugin_dir": str(self.plugin_dir)
|
|
})
|
|
|
|
def discover_all_files(self) -> List[Path]:
|
|
"""Discover all files in plugin directory recursively.
|
|
|
|
Returns:
|
|
List of absolute Path objects for all discovered files
|
|
|
|
Raises:
|
|
FileNotFoundError: If plugin directory doesn't exist
|
|
|
|
Examples:
|
|
>>> files = discovery.discover_all_files()
|
|
>>> for file in files:
|
|
... print(file.relative_to(plugin_dir))
|
|
"""
|
|
if not self.plugin_dir.exists():
|
|
raise FileNotFoundError(
|
|
f"Plugin directory not found: {self.plugin_dir}\n"
|
|
f"Expected structure: plugins/autonomous-dev/"
|
|
)
|
|
|
|
files = []
|
|
|
|
# Recursively walk directory tree
|
|
for path in self.plugin_dir.rglob("*"):
|
|
# Skip directories (we only want files)
|
|
if path.is_dir():
|
|
continue
|
|
|
|
# Security: Skip symlinks to prevent CWE-59 (Symlink Following)
|
|
if path.is_symlink():
|
|
audit_log("file_discovery", "skipped_symlink", {
|
|
"path": str(path),
|
|
"reason": "Symlinks not allowed in plugin distribution"
|
|
})
|
|
continue
|
|
|
|
# Skip if matches exclusion pattern
|
|
if self._should_exclude(path):
|
|
continue
|
|
|
|
files.append(path)
|
|
|
|
return sorted(files) # Sort for deterministic ordering
|
|
|
|
def _should_exclude(self, path: Path) -> bool:
|
|
"""Check if path should be excluded from discovery.
|
|
|
|
Exclusion rules:
|
|
- Cache directories (__pycache__, .pytest_cache)
|
|
- Build artifacts (*.pyc, *.egg-info)
|
|
- Version control (.git/)
|
|
- Hidden files (.*) EXCEPT .env.example
|
|
- Temporary files (*.tmp, *.bak)
|
|
|
|
Args:
|
|
path: Path to check
|
|
|
|
Returns:
|
|
True if path should be excluded, False otherwise
|
|
"""
|
|
# Check if in excluded directory
|
|
parts = path.relative_to(self.plugin_dir).parts
|
|
for part in parts:
|
|
# Excluded directory names (exact match)
|
|
if part in EXCLUDE_PATTERNS:
|
|
return True
|
|
|
|
# Excluded directory patterns (partial match for .egg-info, etc.)
|
|
for dir_pattern in EXCLUDE_DIR_PATTERNS:
|
|
if dir_pattern in part:
|
|
return True
|
|
|
|
# Hidden directories (except allowed)
|
|
if part.startswith(".") and part not in INCLUDE_HIDDEN:
|
|
return True
|
|
|
|
# Check file name patterns
|
|
name = path.name
|
|
|
|
# Excluded file patterns
|
|
for pattern in EXCLUDE_PATTERNS:
|
|
if "*" in pattern:
|
|
# Wildcard pattern (*.pyc, etc.)
|
|
suffix = pattern.replace("*", "")
|
|
if name.endswith(suffix):
|
|
return True
|
|
elif name == pattern:
|
|
return True
|
|
|
|
# Hidden files (except allowed)
|
|
if name.startswith(".") and name not in INCLUDE_HIDDEN:
|
|
return True
|
|
|
|
return False
|
|
|
|
def count_files(self) -> int:
|
|
"""Count total number of files discovered.
|
|
|
|
Returns:
|
|
Total file count
|
|
|
|
Examples:
|
|
>>> count = discovery.count_files()
|
|
>>> print(f"Total files: {count}")
|
|
"""
|
|
return len(self.discover_all_files())
|
|
|
|
def generate_manifest(self) -> Dict[str, Any]:
|
|
"""Generate installation manifest with file metadata.
|
|
|
|
Manifest format:
|
|
{
|
|
"version": "1.0",
|
|
"total_files": 201,
|
|
"files": [
|
|
{"path": "commands/auto-implement.md", "size": 1234},
|
|
{"path": "lib/security_utils.py", "size": 5678},
|
|
...
|
|
]
|
|
}
|
|
|
|
Returns:
|
|
Manifest dictionary
|
|
|
|
Examples:
|
|
>>> manifest = discovery.generate_manifest()
|
|
>>> print(f"Total files: {manifest['total_files']}")
|
|
"""
|
|
files = self.discover_all_files()
|
|
|
|
manifest = {
|
|
"version": "1.0",
|
|
"total_files": len(files),
|
|
"files": []
|
|
}
|
|
|
|
for file_path in files:
|
|
relative = file_path.relative_to(self.plugin_dir)
|
|
manifest["files"].append({
|
|
"path": str(relative).replace("\\", "/"), # Unix-style paths
|
|
"size": file_path.stat().st_size
|
|
})
|
|
|
|
return manifest
|
|
|
|
def save_manifest(self, manifest_path: Path) -> None:
|
|
"""Save installation manifest to JSON file.
|
|
|
|
Args:
|
|
manifest_path: Path to save manifest (e.g., config/installation_manifest.json)
|
|
|
|
Examples:
|
|
>>> manifest_path = plugin_dir / "config" / "installation_manifest.json"
|
|
>>> discovery.save_manifest(manifest_path)
|
|
"""
|
|
manifest = self.generate_manifest()
|
|
|
|
# Create parent directory if needed
|
|
manifest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Save as formatted JSON
|
|
with open(manifest_path, "w") as f:
|
|
json.dump(manifest, f, indent=2)
|
|
|
|
def validate_against_manifest(self, manifest: Dict[str, Any]) -> List[str]:
|
|
"""Validate current files against installation manifest.
|
|
|
|
Detects files that are in manifest but missing from filesystem.
|
|
|
|
Args:
|
|
manifest: Installation manifest dictionary
|
|
|
|
Returns:
|
|
List of missing file paths (relative to plugin_dir)
|
|
|
|
Examples:
|
|
>>> missing = discovery.validate_against_manifest(manifest)
|
|
>>> if missing:
|
|
... print(f"Missing {len(missing)} files:")
|
|
... for file in missing:
|
|
... print(f" - {file}")
|
|
"""
|
|
current_files = self.discover_all_files()
|
|
current_relative = {
|
|
str(f.relative_to(self.plugin_dir)).replace("\\", "/")
|
|
for f in current_files
|
|
}
|
|
|
|
expected_files = {f["path"] for f in manifest["files"]}
|
|
|
|
missing = expected_files - current_relative
|
|
|
|
return sorted(missing)
|
|
|
|
def get_relative_path(self, file_path: Path) -> Path:
|
|
"""Get relative path for file (for copying).
|
|
|
|
Args:
|
|
file_path: Absolute path to file
|
|
|
|
Returns:
|
|
Relative path from plugin_dir
|
|
|
|
Examples:
|
|
>>> abs_path = plugin_dir / "lib" / "nested" / "utils.py"
|
|
>>> rel_path = discovery.get_relative_path(abs_path)
|
|
>>> print(rel_path) # lib/nested/utils.py
|
|
"""
|
|
return file_path.relative_to(self.plugin_dir)
|