#!/usr/bin/env python3 """ Tool Approval Audit - Audit Logging for MCP Auto-Approval This module provides comprehensive audit logging for MCP tool approval decisions. It implements security best practices for audit trail integrity: 1. JSON Lines format (one event per line for easy parsing) 2. Log injection prevention (CWE-117) 3. Sensitive data redaction (API keys, tokens, passwords) 4. Log rotation (10MB max size, keep 5 backups) 5. Thread-safe logging (concurrent agent tool calls) 6. Structured logging fields (timestamp, event, agent, tool, reason) Security Features: - CWE-117 prevention: Sanitize all user input before logging - Sensitive data redaction: Automatically redact API keys, tokens, passwords - Audit trail integrity: Immutable JSON lines format - Log rotation: Prevent disk exhaustion - Thread-safe: Safe for concurrent agent tool calls Usage: from tool_approval_audit import ToolApprovalAuditor # Initialize auditor auditor = ToolApprovalAuditor() # Log approval auditor.log_approval( agent_name="researcher", tool="Bash", parameters={"command": "pytest tests/"}, reason="Matches whitelist pattern: pytest*" ) # Log denial auditor.log_denial( agent_name="researcher", tool="Bash", parameters={"command": "rm -rf /"}, reason="Matches blacklist pattern: rm -rf*", security_risk=True ) # Log circuit breaker trip auditor.log_circuit_breaker_trip( agent_name="researcher", denial_count=10, reason="Too many denials (10), disabling auto-approval" ) Date: 2025-11-15 Issue: #73 (MCP Auto-Approval for Subagent Tool Calls) Agent: implementer Phase: TDD Green (making tests pass) See error-handling-patterns skill for exception hierarchy and error handling best practices. """ import json import logging import re import threading from dataclasses import dataclass, asdict from datetime import datetime, timezone from logging.handlers import RotatingFileHandler from pathlib import Path from typing import Dict, Any, List, Optional # Default audit log file location DEFAULT_LOG_FILE = Path(__file__).parent.parent.parent.parent / "logs" / "tool_auto_approve_audit.log" # Sensitive data patterns for redaction SENSITIVE_PATTERNS = [ (re.compile(r'(Authorization|Bearer|Token):\s*\S+', re.IGNORECASE), r'\1: [REDACTED]'), (re.compile(r'(api[_-]?key|apikey)\s*[=:]\s*[\'"]?\S+', re.IGNORECASE), r'\1=[REDACTED]'), (re.compile(r'(password|passwd|pwd)\s*[=:]\s*[\'"]?\S+', re.IGNORECASE), r'\1=[REDACTED]'), (re.compile(r'(secret|token)\s*[=:]\s*[\'"]?\S+', re.IGNORECASE), r'\1=[REDACTED]'), (re.compile(r'sk-[a-zA-Z0-9]{20,}'), '[REDACTED_API_KEY]'), # OpenAI-style API keys (re.compile(r'ghp_[a-zA-Z0-9]{36,}'), '[REDACTED_GITHUB_TOKEN]'), # GitHub tokens ] # Log injection prevention patterns (CWE-117) # All control characters from \x00 to \x1f except \t (tab is visible) INJECTION_CHARS = [chr(i) for i in range(0x00, 0x20) if i != 0x09] # Exclude tab (0x09) # Thread-safe logger singleton _audit_logger: Optional[logging.Logger] = None _audit_logger_lock = threading.Lock() @dataclass class AuditLogEntry: """Structured audit log entry. Attributes: timestamp: ISO 8601 timestamp with timezone event: Event type (approval, denial, circuit_breaker_trip) agent: Agent name that requested tool call tool: Tool name (Bash, Read, Write, etc.) reason: Human-readable explanation of decision security_risk: Whether denial is due to security concerns parameters: Sanitized tool parameters denial_count: Number of denials (for circuit breaker events) """ timestamp: str event: str agent: str tool: Optional[str] = None reason: Optional[str] = None security_risk: bool = False parameters: Optional[Dict[str, Any]] = None denial_count: Optional[int] = None def to_dict(self) -> Dict[str, Any]: """Convert to dictionary, excluding None values. Returns: Dictionary representation """ return {k: v for k, v in asdict(self).items() if v is not None} class ToolApprovalAuditor: """Audit logger for MCP tool approval decisions. This class provides thread-safe audit logging with: - JSON Lines format (one event per line) - Log injection prevention (CWE-117) - Sensitive data redaction - Log rotation (10MB max, 5 backups) Thread-safe: Uses threading.Lock for concurrent access. Example: >>> auditor = ToolApprovalAuditor() >>> auditor.log_approval("researcher", "Bash", {"command": "pytest"}, "Whitelisted") """ def __init__(self, log_file: Optional[Path] = None): """Initialize ToolApprovalAuditor. Args: log_file: Path to audit log file (default: logs/tool_auto_approve_audit.log) """ self.log_file = log_file or DEFAULT_LOG_FILE self._ensure_log_file_exists() self.logger = self._get_audit_logger() def _ensure_log_file_exists(self) -> None: """Create log file and parent directories if they don't exist.""" self.log_file.parent.mkdir(parents=True, exist_ok=True) if not self.log_file.exists(): self.log_file.touch() def _get_audit_logger(self) -> logging.Logger: """Get or create thread-safe audit logger with rotation. Returns: Configured logger for audit events """ global _audit_logger, _audit_logger_lock with _audit_logger_lock: if _audit_logger is None: _audit_logger = logging.getLogger("tool_approval_audit") _audit_logger.setLevel(logging.INFO) _audit_logger.propagate = False # Don't propagate to root logger # Remove existing handlers _audit_logger.handlers.clear() # Add rotating file handler (10MB max, 5 backups) handler = RotatingFileHandler( self.log_file, maxBytes=10 * 1024 * 1024, # 10MB backupCount=5, encoding='utf-8', ) # JSON Lines format (no extra formatting) formatter = logging.Formatter('%(message)s') handler.setFormatter(formatter) _audit_logger.addHandler(handler) return _audit_logger def log_approval( self, agent_name: str, tool: str, parameters: Dict[str, Any], reason: str, ) -> None: """Log tool approval decision. Args: agent_name: Name of agent that requested tool call tool: Tool name (Bash, Read, Write, etc.) parameters: Tool parameters (will be sanitized) reason: Human-readable explanation of approval """ # Sanitize parameters sanitized_params = self._sanitize_parameters(parameters) # Create audit log entry entry = AuditLogEntry( timestamp=datetime.now(timezone.utc).isoformat(), event="approval", agent=agent_name, tool=tool, reason=sanitize_log_input(reason), security_risk=False, parameters=sanitized_params, ) # Write JSON line to log self.logger.info(json.dumps(entry.to_dict())) def log_denial( self, agent_name: str, tool: str, parameters: Dict[str, Any], reason: str, security_risk: bool = False, ) -> None: """Log tool denial decision. Args: agent_name: Name of agent that requested tool call tool: Tool name (Bash, Read, Write, etc.) parameters: Tool parameters (will be sanitized) reason: Human-readable explanation of denial security_risk: Whether denial is due to security concerns """ # Sanitize parameters sanitized_params = self._sanitize_parameters(parameters) # Create audit log entry entry = AuditLogEntry( timestamp=datetime.now(timezone.utc).isoformat(), event="denial", agent=agent_name, tool=tool, reason=sanitize_log_input(reason), security_risk=security_risk, parameters=sanitized_params, ) # Write JSON line to log self.logger.info(json.dumps(entry.to_dict())) def log_circuit_breaker_trip( self, agent_name: str, denial_count: int, reason: str, ) -> None: """Log circuit breaker trip event. Args: agent_name: Name of agent that triggered circuit breaker denial_count: Number of denials that triggered circuit breaker reason: Human-readable explanation """ # Create audit log entry entry = AuditLogEntry( timestamp=datetime.now(timezone.utc).isoformat(), event="circuit_breaker_trip", agent=agent_name, reason=sanitize_log_input(reason), denial_count=denial_count, ) # Write JSON line to log self.logger.info(json.dumps(entry.to_dict())) def _sanitize_parameters(self, parameters: Dict[str, Any]) -> Dict[str, Any]: """Sanitize parameters to remove sensitive data. Args: parameters: Tool parameters dictionary Returns: Sanitized parameters with sensitive data redacted """ sanitized = {} for key, value in parameters.items(): if isinstance(value, str): # Redact sensitive data sanitized_value = value for pattern, replacement in SENSITIVE_PATTERNS: sanitized_value = pattern.sub(replacement, sanitized_value) # Prevent log injection (CWE-117) sanitized_value = sanitize_log_input(sanitized_value) sanitized[key] = sanitized_value else: # Non-string values are safe (int, bool, etc.) sanitized[key] = value return sanitized def sanitize_log_input(text: str) -> str: """Sanitize text input to prevent log injection (CWE-117). Removes newlines, carriage returns, tabs, null bytes, and ANSI escape sequences that could be used to inject fake log entries or break log parsing. Args: text: Text to sanitize Returns: Sanitized text with injection characters replaced by spaces """ sanitized = text # Remove individual injection characters for char in INJECTION_CHARS: sanitized = sanitized.replace(char, ' ') # Remove ANSI escape sequences (multi-byte patterns like \x1b[...) # Pattern: ESC [ followed by any number of parameters and command letter ansi_escape_pattern = re.compile(r'\x1b\[[0-9;]*[a-zA-Z]') sanitized = ansi_escape_pattern.sub(' ', sanitized) return sanitized def parse_audit_log(log_file: Optional[Path] = None) -> List[AuditLogEntry]: """Parse audit log file into structured entries. Args: log_file: Path to audit log file (default: logs/tool_auto_approve_audit.log) Returns: List of AuditLogEntry objects """ log_file = log_file or DEFAULT_LOG_FILE if not log_file.exists(): return [] entries = [] with open(log_file, 'r') as f: for line in f: line = line.strip() if not line: continue try: data = json.loads(line) entry = AuditLogEntry(**data) entries.append(entry) except (json.JSONDecodeError, TypeError) as e: # Skip malformed lines continue return entries # Convenience functions for direct usage # Global auditor instance (lazy initialization) _global_auditor: Optional[ToolApprovalAuditor] = None _global_auditor_lock = threading.Lock() def _get_global_auditor() -> ToolApprovalAuditor: """Get or create global auditor instance. Returns: Global ToolApprovalAuditor instance """ global _global_auditor, _global_auditor_lock with _global_auditor_lock: if _global_auditor is None: _global_auditor = ToolApprovalAuditor() return _global_auditor def log_approval( agent_name: str, tool: str, parameters: Dict[str, Any], reason: str, ) -> None: """Log approval decision (convenience function). Args: agent_name: Agent name tool: Tool name parameters: Tool parameters reason: Approval reason """ auditor = _get_global_auditor() auditor.log_approval(agent_name, tool, parameters, reason) def log_denial( agent_name: str, tool: str, parameters: Dict[str, Any], reason: str, security_risk: bool = False, ) -> None: """Log denial decision (convenience function). Args: agent_name: Agent name tool: Tool name parameters: Tool parameters reason: Denial reason security_risk: Whether denial is due to security """ auditor = _get_global_auditor() auditor.log_denial(agent_name, tool, parameters, reason, security_risk) def log_circuit_breaker_trip( agent_name: str, denial_count: int, reason: str, ) -> None: """Log circuit breaker trip (convenience function). Args: agent_name: Agent name denial_count: Number of denials reason: Trip reason """ auditor = _get_global_auditor() auditor.log_circuit_breaker_trip(agent_name, denial_count, reason)