TradingAgents/.claude/lib/tool_approval_audit.py

441 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Tool Approval Audit - Audit Logging for MCP Auto-Approval
This module provides comprehensive audit logging for MCP tool approval decisions.
It implements security best practices for audit trail integrity:
1. JSON Lines format (one event per line for easy parsing)
2. Log injection prevention (CWE-117)
3. Sensitive data redaction (API keys, tokens, passwords)
4. Log rotation (10MB max size, keep 5 backups)
5. Thread-safe logging (concurrent agent tool calls)
6. Structured logging fields (timestamp, event, agent, tool, reason)
Security Features:
- CWE-117 prevention: Sanitize all user input before logging
- Sensitive data redaction: Automatically redact API keys, tokens, passwords
- Audit trail integrity: Immutable JSON lines format
- Log rotation: Prevent disk exhaustion
- Thread-safe: Safe for concurrent agent tool calls
Usage:
from tool_approval_audit import ToolApprovalAuditor
# Initialize auditor
auditor = ToolApprovalAuditor()
# Log approval
auditor.log_approval(
agent_name="researcher",
tool="Bash",
parameters={"command": "pytest tests/"},
reason="Matches whitelist pattern: pytest*"
)
# Log denial
auditor.log_denial(
agent_name="researcher",
tool="Bash",
parameters={"command": "rm -rf /"},
reason="Matches blacklist pattern: rm -rf*",
security_risk=True
)
# Log circuit breaker trip
auditor.log_circuit_breaker_trip(
agent_name="researcher",
denial_count=10,
reason="Too many denials (10), disabling auto-approval"
)
Date: 2025-11-15
Issue: #73 (MCP Auto-Approval for Subagent Tool Calls)
Agent: implementer
Phase: TDD Green (making tests pass)
See error-handling-patterns skill for exception hierarchy and error handling best practices.
"""
import json
import logging
import re
import threading
from dataclasses import dataclass, asdict
from datetime import datetime, timezone
from logging.handlers import RotatingFileHandler
from pathlib import Path
from typing import Dict, Any, List, Optional
# Default audit log file location
DEFAULT_LOG_FILE = Path(__file__).parent.parent.parent.parent / "logs" / "tool_auto_approve_audit.log"
# Sensitive data patterns for redaction
SENSITIVE_PATTERNS = [
(re.compile(r'(Authorization|Bearer|Token):\s*\S+', re.IGNORECASE), r'\1: [REDACTED]'),
(re.compile(r'(api[_-]?key|apikey)\s*[=:]\s*[\'"]?\S+', re.IGNORECASE), r'\1=[REDACTED]'),
(re.compile(r'(password|passwd|pwd)\s*[=:]\s*[\'"]?\S+', re.IGNORECASE), r'\1=[REDACTED]'),
(re.compile(r'(secret|token)\s*[=:]\s*[\'"]?\S+', re.IGNORECASE), r'\1=[REDACTED]'),
(re.compile(r'sk-[a-zA-Z0-9]{20,}'), '[REDACTED_API_KEY]'), # OpenAI-style API keys
(re.compile(r'ghp_[a-zA-Z0-9]{36,}'), '[REDACTED_GITHUB_TOKEN]'), # GitHub tokens
]
# Log injection prevention patterns (CWE-117)
# All control characters from \x00 to \x1f except \t (tab is visible)
INJECTION_CHARS = [chr(i) for i in range(0x00, 0x20) if i != 0x09] # Exclude tab (0x09)
# Thread-safe logger singleton
_audit_logger: Optional[logging.Logger] = None
_audit_logger_lock = threading.Lock()
@dataclass
class AuditLogEntry:
"""Structured audit log entry.
Attributes:
timestamp: ISO 8601 timestamp with timezone
event: Event type (approval, denial, circuit_breaker_trip)
agent: Agent name that requested tool call
tool: Tool name (Bash, Read, Write, etc.)
reason: Human-readable explanation of decision
security_risk: Whether denial is due to security concerns
parameters: Sanitized tool parameters
denial_count: Number of denials (for circuit breaker events)
"""
timestamp: str
event: str
agent: str
tool: Optional[str] = None
reason: Optional[str] = None
security_risk: bool = False
parameters: Optional[Dict[str, Any]] = None
denial_count: Optional[int] = None
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary, excluding None values.
Returns:
Dictionary representation
"""
return {k: v for k, v in asdict(self).items() if v is not None}
class ToolApprovalAuditor:
"""Audit logger for MCP tool approval decisions.
This class provides thread-safe audit logging with:
- JSON Lines format (one event per line)
- Log injection prevention (CWE-117)
- Sensitive data redaction
- Log rotation (10MB max, 5 backups)
Thread-safe: Uses threading.Lock for concurrent access.
Example:
>>> auditor = ToolApprovalAuditor()
>>> auditor.log_approval("researcher", "Bash", {"command": "pytest"}, "Whitelisted")
"""
def __init__(self, log_file: Optional[Path] = None):
"""Initialize ToolApprovalAuditor.
Args:
log_file: Path to audit log file (default: logs/tool_auto_approve_audit.log)
"""
self.log_file = log_file or DEFAULT_LOG_FILE
self._ensure_log_file_exists()
self.logger = self._get_audit_logger()
def _ensure_log_file_exists(self) -> None:
"""Create log file and parent directories if they don't exist."""
self.log_file.parent.mkdir(parents=True, exist_ok=True)
if not self.log_file.exists():
self.log_file.touch()
def _get_audit_logger(self) -> logging.Logger:
"""Get or create thread-safe audit logger with rotation.
Returns:
Configured logger for audit events
"""
global _audit_logger, _audit_logger_lock
with _audit_logger_lock:
if _audit_logger is None:
_audit_logger = logging.getLogger("tool_approval_audit")
_audit_logger.setLevel(logging.INFO)
_audit_logger.propagate = False # Don't propagate to root logger
# Remove existing handlers
_audit_logger.handlers.clear()
# Add rotating file handler (10MB max, 5 backups)
handler = RotatingFileHandler(
self.log_file,
maxBytes=10 * 1024 * 1024, # 10MB
backupCount=5,
encoding='utf-8',
)
# JSON Lines format (no extra formatting)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
_audit_logger.addHandler(handler)
return _audit_logger
def log_approval(
self,
agent_name: str,
tool: str,
parameters: Dict[str, Any],
reason: str,
) -> None:
"""Log tool approval decision.
Args:
agent_name: Name of agent that requested tool call
tool: Tool name (Bash, Read, Write, etc.)
parameters: Tool parameters (will be sanitized)
reason: Human-readable explanation of approval
"""
# Sanitize parameters
sanitized_params = self._sanitize_parameters(parameters)
# Create audit log entry
entry = AuditLogEntry(
timestamp=datetime.now(timezone.utc).isoformat(),
event="approval",
agent=agent_name,
tool=tool,
reason=sanitize_log_input(reason),
security_risk=False,
parameters=sanitized_params,
)
# Write JSON line to log
self.logger.info(json.dumps(entry.to_dict()))
def log_denial(
self,
agent_name: str,
tool: str,
parameters: Dict[str, Any],
reason: str,
security_risk: bool = False,
) -> None:
"""Log tool denial decision.
Args:
agent_name: Name of agent that requested tool call
tool: Tool name (Bash, Read, Write, etc.)
parameters: Tool parameters (will be sanitized)
reason: Human-readable explanation of denial
security_risk: Whether denial is due to security concerns
"""
# Sanitize parameters
sanitized_params = self._sanitize_parameters(parameters)
# Create audit log entry
entry = AuditLogEntry(
timestamp=datetime.now(timezone.utc).isoformat(),
event="denial",
agent=agent_name,
tool=tool,
reason=sanitize_log_input(reason),
security_risk=security_risk,
parameters=sanitized_params,
)
# Write JSON line to log
self.logger.info(json.dumps(entry.to_dict()))
def log_circuit_breaker_trip(
self,
agent_name: str,
denial_count: int,
reason: str,
) -> None:
"""Log circuit breaker trip event.
Args:
agent_name: Name of agent that triggered circuit breaker
denial_count: Number of denials that triggered circuit breaker
reason: Human-readable explanation
"""
# Create audit log entry
entry = AuditLogEntry(
timestamp=datetime.now(timezone.utc).isoformat(),
event="circuit_breaker_trip",
agent=agent_name,
reason=sanitize_log_input(reason),
denial_count=denial_count,
)
# Write JSON line to log
self.logger.info(json.dumps(entry.to_dict()))
def _sanitize_parameters(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
"""Sanitize parameters to remove sensitive data.
Args:
parameters: Tool parameters dictionary
Returns:
Sanitized parameters with sensitive data redacted
"""
sanitized = {}
for key, value in parameters.items():
if isinstance(value, str):
# Redact sensitive data
sanitized_value = value
for pattern, replacement in SENSITIVE_PATTERNS:
sanitized_value = pattern.sub(replacement, sanitized_value)
# Prevent log injection (CWE-117)
sanitized_value = sanitize_log_input(sanitized_value)
sanitized[key] = sanitized_value
else:
# Non-string values are safe (int, bool, etc.)
sanitized[key] = value
return sanitized
def sanitize_log_input(text: str) -> str:
"""Sanitize text input to prevent log injection (CWE-117).
Removes newlines, carriage returns, tabs, null bytes, and ANSI escape
sequences that could be used to inject fake log entries or break log parsing.
Args:
text: Text to sanitize
Returns:
Sanitized text with injection characters replaced by spaces
"""
sanitized = text
# Remove individual injection characters
for char in INJECTION_CHARS:
sanitized = sanitized.replace(char, ' ')
# Remove ANSI escape sequences (multi-byte patterns like \x1b[...)
# Pattern: ESC [ followed by any number of parameters and command letter
ansi_escape_pattern = re.compile(r'\x1b\[[0-9;]*[a-zA-Z]')
sanitized = ansi_escape_pattern.sub(' ', sanitized)
return sanitized
def parse_audit_log(log_file: Optional[Path] = None) -> List[AuditLogEntry]:
"""Parse audit log file into structured entries.
Args:
log_file: Path to audit log file (default: logs/tool_auto_approve_audit.log)
Returns:
List of AuditLogEntry objects
"""
log_file = log_file or DEFAULT_LOG_FILE
if not log_file.exists():
return []
entries = []
with open(log_file, 'r') as f:
for line in f:
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
entry = AuditLogEntry(**data)
entries.append(entry)
except (json.JSONDecodeError, TypeError) as e:
# Skip malformed lines
continue
return entries
# Convenience functions for direct usage
# Global auditor instance (lazy initialization)
_global_auditor: Optional[ToolApprovalAuditor] = None
_global_auditor_lock = threading.Lock()
def _get_global_auditor() -> ToolApprovalAuditor:
"""Get or create global auditor instance.
Returns:
Global ToolApprovalAuditor instance
"""
global _global_auditor, _global_auditor_lock
with _global_auditor_lock:
if _global_auditor is None:
_global_auditor = ToolApprovalAuditor()
return _global_auditor
def log_approval(
agent_name: str,
tool: str,
parameters: Dict[str, Any],
reason: str,
) -> None:
"""Log approval decision (convenience function).
Args:
agent_name: Agent name
tool: Tool name
parameters: Tool parameters
reason: Approval reason
"""
auditor = _get_global_auditor()
auditor.log_approval(agent_name, tool, parameters, reason)
def log_denial(
agent_name: str,
tool: str,
parameters: Dict[str, Any],
reason: str,
security_risk: bool = False,
) -> None:
"""Log denial decision (convenience function).
Args:
agent_name: Agent name
tool: Tool name
parameters: Tool parameters
reason: Denial reason
security_risk: Whether denial is due to security
"""
auditor = _get_global_auditor()
auditor.log_denial(agent_name, tool, parameters, reason, security_risk)
def log_circuit_breaker_trip(
agent_name: str,
denial_count: int,
reason: str,
) -> None:
"""Log circuit breaker trip (convenience function).
Args:
agent_name: Agent name
denial_count: Number of denials
reason: Trip reason
"""
auditor = _get_global_auditor()
auditor.log_circuit_breaker_trip(agent_name, denial_count, reason)