#!/usr/bin/env python3 """ Install Audit - Audit logging for GenAI-first installation system This module provides audit trail logging for installation operations, tracking protected files, conflicts, resolutions, and outcomes. Key Features: - JSONL format audit logs (one JSON per line) - Installation attempt tracking with unique IDs - Protected file recording - Conflict tracking and resolution logging - Report generation from audit trail - Crash-resistant (append-only, recoverable) Usage: from install_audit import InstallAudit # Start installation audit = InstallAudit(Path.home() / ".autonomous-dev" / "install_audit.jsonl") install_id = audit.start_installation("fresh") # Log events audit.record_protected_file(install_id, ".env", "secrets") audit.log_success(install_id, files_copied=42) # Generate report report = audit.generate_report(install_id) Date: 2025-12-09 Issue: #106 (GenAI-first installation system) Agent: implementer Design Patterns: See library-design-patterns skill for standardized design patterns. """ import json import uuid from pathlib import Path from typing import Dict, Any, List, Optional from datetime import datetime # Security utilities try: from plugins.autonomous_dev.lib.security_utils import audit_log except ImportError: from security_utils import audit_log class AuditEntry: """Audit log entry data class.""" def __init__( self, event: str, install_id: str, timestamp: Optional[str] = None, **kwargs ): """Initialize audit entry. Args: event: Event type (installation_start, protected_file, etc.) install_id: Unique installation ID timestamp: ISO 8601 timestamp (auto-generated if None) **kwargs: Additional event-specific data """ self.event = event self.install_id = install_id self.timestamp = timestamp or (datetime.utcnow().isoformat() + "Z") self.data = kwargs def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for JSON serialization.""" return { "event": self.event, "install_id": self.install_id, "timestamp": self.timestamp, **self.data } class InstallAudit: """Audit logging for installation operations. This class provides append-only audit logging in JSONL format, tracking all installation events for security and debugging. Attributes: audit_file: Path to audit log file (JSONL format) Examples: >>> audit = InstallAudit(Path("install_audit.jsonl")) >>> install_id = audit.start_installation("fresh") >>> audit.log_success(install_id, files_copied=42) """ def __init__(self, audit_file: Path | str): """Initialize audit logger. Args: audit_file: Path to audit log file Note: Parent directories are created automatically. File is created in append mode (preserves existing entries). """ self.audit_file = Path(audit_file) if isinstance(audit_file, str) else audit_file # Create parent directories self.audit_file.parent.mkdir(parents=True, exist_ok=True) # Security audit log audit_log("install_audit", "initialized", { "audit_file": str(self.audit_file) }) def start_installation(self, install_type: str) -> str: """Log installation start and return unique install ID. Args: install_type: Installation type (fresh, brownfield, upgrade) Returns: Unique installation ID (UUID) Examples: >>> audit = InstallAudit(Path("audit.jsonl")) >>> install_id = audit.start_installation("fresh") """ install_id = str(uuid.uuid4()) entry = AuditEntry( event="installation_start", install_id=install_id, install_type=install_type ) self._write_entry(entry) return install_id def log_success(self, install_id: str, files_copied: int, **kwargs) -> None: """Log successful installation completion. Args: install_id: Installation ID from start_installation() files_copied: Number of files copied **kwargs: Additional context (files_skipped, files_backed_up, etc.) Examples: >>> audit.log_success(install_id, files_copied=42, files_skipped=2) """ entry = AuditEntry( event="installation_success", install_id=install_id, files_copied=files_copied, **kwargs ) self._write_entry(entry) def log_failure(self, install_id: str, error: str, **kwargs) -> None: """Log failed installation. Args: install_id: Installation ID from start_installation() error: Error message **kwargs: Additional context Examples: >>> audit.log_failure(install_id, error="Permission denied") """ entry = AuditEntry( event="installation_failure", install_id=install_id, error=error, **kwargs ) self._write_entry(entry) def record_protected_file( self, install_id: str, file_path: str, reason: str, metadata: Optional[Dict[str, Any]] = None ) -> None: """Record a protected file. Args: install_id: Installation ID file_path: Relative path to protected file reason: Why file is protected metadata: Optional additional metadata Examples: >>> audit.record_protected_file( ... install_id, ... ".env", ... "secrets", ... metadata={"size": 1024} ... ) """ # Validate path for security self._validate_path(file_path) entry = AuditEntry( event="protected_file", install_id=install_id, file=file_path, reason=reason ) if metadata: entry.data["metadata"] = metadata self._write_entry(entry) def record_conflict( self, install_id: str, file_path: str, existing_hash: str, staging_hash: str, **kwargs ) -> None: """Record a file conflict. Args: install_id: Installation ID file_path: Relative path to conflicting file existing_hash: Hash of existing file staging_hash: Hash of staging file **kwargs: Additional context Examples: >>> audit.record_conflict( ... install_id, ... "file.py", ... existing_hash="abc", ... staging_hash="def" ... ) """ self._validate_path(file_path) entry = AuditEntry( event="conflict", install_id=install_id, file=file_path, existing_hash=existing_hash, staging_hash=staging_hash, **kwargs ) self._write_entry(entry) def record_conflict_resolution( self, install_id: str, file_path: str, action: str, **kwargs ) -> None: """Record conflict resolution action. Args: install_id: Installation ID file_path: Relative path to file action: Action taken (backup, skip, overwrite) **kwargs: Additional context (backup_path, etc.) Examples: >>> audit.record_conflict_resolution( ... install_id, ... "file.py", ... action="backup", ... backup_path="file.py.bak" ... ) """ self._validate_path(file_path) entry = AuditEntry( event="conflict_resolution", install_id=install_id, file=file_path, action=action, **kwargs ) self._write_entry(entry) def generate_report(self, install_id: str) -> Dict[str, Any]: """Generate installation report from audit trail. Args: install_id: Installation ID to generate report for Returns: Dict with installation report: - install_id: Installation ID - status: Status (success, failure, in_progress) - timeline: Chronological list of events - summary: Summary statistics - protected_files: List of protected files - conflicts: List of conflicts Raises: ValueError: If install ID not found in audit log Examples: >>> report = audit.generate_report(install_id) >>> print(f"Status: {report['status']}") """ entries = self._read_entries_for_install(install_id) if not entries: raise ValueError(f"Install ID not found: {install_id}") # Parse entries status = "in_progress" timeline = [] protected_files = [] conflicts = [] stats = { "total_protected_files": 0, "total_conflicts": 0, "files_copied": 0 } for entry_dict in entries: event = entry_dict["event"] timeline.append(entry_dict) if event == "installation_success": status = "success" stats["files_copied"] = entry_dict.get("files_copied", 0) elif event == "installation_failure": status = "failure" elif event == "protected_file": protected_files.append(entry_dict["file"]) stats["total_protected_files"] += 1 elif event == "conflict": conflicts.append(entry_dict["file"]) stats["total_conflicts"] += 1 return { "install_id": install_id, "status": status, "timeline": timeline, "summary": stats, "protected_files": protected_files, "conflicts": conflicts } def export_report(self, install_id: str, report_file: Path | str) -> None: """Export installation report to JSON file. Args: install_id: Installation ID report_file: Path to output report file Examples: >>> audit.export_report(install_id, Path("report.json")) """ report = self.generate_report(install_id) report_path = Path(report_file) if isinstance(report_file, str) else report_file report_path.parent.mkdir(parents=True, exist_ok=True) with open(report_path, "w") as f: json.dump(report, f, indent=2) def get_all_installations(self) -> List[Dict[str, Any]]: """Get all installation attempts from audit log. Returns: List of installation info dicts (one per install_id) Examples: >>> history = audit.get_all_installations() >>> print(f"Found {len(history)} installations") """ if not self.audit_file.exists(): return [] installations = {} with open(self.audit_file, "r") as f: for line in f: try: entry = json.loads(line.strip()) install_id = entry.get("install_id") if not install_id: continue # Track start entries if entry["event"] == "installation_start": installations[install_id] = { "install_id": install_id, "install_type": entry.get("install_type"), "timestamp": entry.get("timestamp") } except json.JSONDecodeError: # Skip corrupted lines continue return list(installations.values()) def get_installations_by_status(self, status: str) -> List[Dict[str, Any]]: """Get installations filtered by status. Args: status: Status to filter by (success, failure) Returns: List of installation info dicts matching status Examples: >>> successful = audit.get_installations_by_status("success") """ installations = [] for install_info in self.get_all_installations(): install_id = install_info["install_id"] try: report = self.generate_report(install_id) if report["status"] == status: installations.append(install_info) except ValueError: continue return installations def _write_entry(self, entry: AuditEntry) -> None: """Write audit entry to log file. Args: entry: AuditEntry to write """ # Append to audit file with open(self.audit_file, "a") as f: f.write(json.dumps(entry.to_dict()) + "\n") def _read_entries_for_install(self, install_id: str) -> List[Dict[str, Any]]: """Read all entries for a specific installation. Args: install_id: Installation ID Returns: List of entry dicts for this installation """ if not self.audit_file.exists(): return [] entries = [] with open(self.audit_file, "r") as f: for line in f: try: entry = json.loads(line.strip()) if entry.get("install_id") == install_id: entries.append(entry) except json.JSONDecodeError: # Skip corrupted lines continue return entries def _validate_path(self, file_path: str) -> None: """Validate file path for security. Args: file_path: Relative file path Raises: ValueError: If path contains traversal or is absolute """ # Check for path traversal if ".." in file_path: raise ValueError(f"Path traversal not allowed (invalid path): {file_path}") # Check for absolute paths if Path(file_path).is_absolute(): raise ValueError(f"Absolute paths not allowed (invalid path): {file_path}")