#!/usr/bin/env python3
"""
Tool Approval Audit - Audit Logging for MCP Auto-Approval

This module provides comprehensive audit logging for MCP tool approval decisions.
It implements security best practices for audit trail integrity:

1. JSON Lines format (one event per line for easy parsing)
2. Log injection prevention (CWE-117)
3. Sensitive data redaction (API keys, tokens, passwords)
4. Log rotation (10MB max size, keep 5 backups)
5. Thread-safe logging (concurrent agent tool calls)
6. Structured logging fields (timestamp, event, agent, tool, reason)

Security Features:
- CWE-117 prevention: Sanitize all user input before logging
- Sensitive data redaction: Automatically redact API keys, tokens, passwords
- Audit trail integrity: Immutable JSON lines format
- Log rotation: Prevent disk exhaustion
- Thread-safe: Safe for concurrent agent tool calls

Usage:
    from tool_approval_audit import ToolApprovalAuditor

    # Initialize auditor
    auditor = ToolApprovalAuditor()

    # Log approval
    auditor.log_approval(
        agent_name="researcher",
        tool="Bash",
        parameters={"command": "pytest tests/"},
        reason="Matches whitelist pattern: pytest*"
    )

    # Log denial
    auditor.log_denial(
        agent_name="researcher",
        tool="Bash",
        parameters={"command": "rm -rf /"},
        reason="Matches blacklist pattern: rm -rf*",
        security_risk=True
    )

    # Log circuit breaker trip
    auditor.log_circuit_breaker_trip(
        agent_name="researcher",
        denial_count=10,
        reason="Too many denials (10), disabling auto-approval"
    )

Date: 2025-11-15
Issue: #73 (MCP Auto-Approval for Subagent Tool Calls)
Agent: implementer
Phase: TDD Green (making tests pass)

See error-handling-patterns skill for exception hierarchy and error handling best practices.
"""

import json
import logging
import re
import threading
from dataclasses import dataclass, asdict
from datetime import datetime, timezone
from logging.handlers import RotatingFileHandler
from pathlib import Path
from typing import Dict, Any, List, Optional


# Default audit log file location
DEFAULT_LOG_FILE = Path(__file__).parent.parent.parent.parent / "logs" / "tool_auto_approve_audit.log"

# Sensitive data patterns for redaction
SENSITIVE_PATTERNS = [
    (re.compile(r'(Authorization|Bearer|Token):\s*\S+', re.IGNORECASE), r'\1: [REDACTED]'),
    (re.compile(r'(api[_-]?key|apikey)\s*[=:]\s*[\'"]?\S+', re.IGNORECASE), r'\1=[REDACTED]'),
    (re.compile(r'(password|passwd|pwd)\s*[=:]\s*[\'"]?\S+', re.IGNORECASE), r'\1=[REDACTED]'),
    (re.compile(r'(secret|token)\s*[=:]\s*[\'"]?\S+', re.IGNORECASE), r'\1=[REDACTED]'),
    (re.compile(r'sk-[a-zA-Z0-9]{20,}'), '[REDACTED_API_KEY]'),  # OpenAI-style API keys
    (re.compile(r'ghp_[a-zA-Z0-9]{36,}'), '[REDACTED_GITHUB_TOKEN]'),  # GitHub tokens
]

# Log injection prevention patterns (CWE-117)
# All control characters from \x00 to \x1f except \t (tab is visible)
INJECTION_CHARS = [chr(i) for i in range(0x00, 0x20) if i != 0x09]  # Exclude tab (0x09)

# Thread-safe logger singleton
_audit_logger: Optional[logging.Logger] = None
_audit_logger_lock = threading.Lock()


@dataclass
class AuditLogEntry:
    """Structured audit log entry.

    Attributes:
        timestamp: ISO 8601 timestamp with timezone
        event: Event type (approval, denial, circuit_breaker_trip)
        agent: Agent name that requested tool call
        tool: Tool name (Bash, Read, Write, etc.)
        reason: Human-readable explanation of decision
        security_risk: Whether denial is due to security concerns
        parameters: Sanitized tool parameters
        denial_count: Number of denials (for circuit breaker events)
    """
    timestamp: str
    event: str
    agent: str
    tool: Optional[str] = None
    reason: Optional[str] = None
    security_risk: bool = False
    parameters: Optional[Dict[str, Any]] = None
    denial_count: Optional[int] = None

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary, excluding None values.

        Returns:
            Dictionary representation
        """
        return {k: v for k, v in asdict(self).items() if v is not None}


class ToolApprovalAuditor:
    """Audit logger for MCP tool approval decisions.

    This class provides thread-safe audit logging with:
    - JSON Lines format (one event per line)
    - Log injection prevention (CWE-117)
    - Sensitive data redaction
    - Log rotation (10MB max, 5 backups)

    Thread-safe: Uses threading.Lock for concurrent access.

    Example:
        >>> auditor = ToolApprovalAuditor()
        >>> auditor.log_approval("researcher", "Bash", {"command": "pytest"}, "Whitelisted")
    """

    def __init__(self, log_file: Optional[Path] = None):
        """Initialize ToolApprovalAuditor.

        Args:
            log_file: Path to audit log file (default: logs/tool_auto_approve_audit.log)
        """
        self.log_file = log_file or DEFAULT_LOG_FILE
        self._ensure_log_file_exists()
        self.logger = self._get_audit_logger()

    def _ensure_log_file_exists(self) -> None:
        """Create log file and parent directories if they don't exist."""
        self.log_file.parent.mkdir(parents=True, exist_ok=True)
        if not self.log_file.exists():
            self.log_file.touch()

    def _get_audit_logger(self) -> logging.Logger:
        """Get or create thread-safe audit logger with rotation.

        Returns:
            Configured logger for audit events
        """
        global _audit_logger, _audit_logger_lock

        with _audit_logger_lock:
            if _audit_logger is None:
                _audit_logger = logging.getLogger("tool_approval_audit")
                _audit_logger.setLevel(logging.INFO)
                _audit_logger.propagate = False  # Don't propagate to root logger

                # Remove existing handlers
                _audit_logger.handlers.clear()

                # Add rotating file handler (10MB max, 5 backups)
                handler = RotatingFileHandler(
                    self.log_file,
                    maxBytes=10 * 1024 * 1024,  # 10MB
                    backupCount=5,
                    encoding='utf-8',
                )

                # JSON Lines format (no extra formatting)
                formatter = logging.Formatter('%(message)s')
                handler.setFormatter(formatter)

                _audit_logger.addHandler(handler)

            return _audit_logger

    def log_approval(
        self,
        agent_name: str,
        tool: str,
        parameters: Dict[str, Any],
        reason: str,
    ) -> None:
        """Log tool approval decision.

        Args:
            agent_name: Name of agent that requested tool call
            tool: Tool name (Bash, Read, Write, etc.)
            parameters: Tool parameters (will be sanitized)
            reason: Human-readable explanation of approval
        """
        # Sanitize parameters
        sanitized_params = self._sanitize_parameters(parameters)

        # Create audit log entry
        entry = AuditLogEntry(
            timestamp=datetime.now(timezone.utc).isoformat(),
            event="approval",
            agent=agent_name,
            tool=tool,
            reason=sanitize_log_input(reason),
            security_risk=False,
            parameters=sanitized_params,
        )

        # Write JSON line to log
        self.logger.info(json.dumps(entry.to_dict()))

    def log_denial(
        self,
        agent_name: str,
        tool: str,
        parameters: Dict[str, Any],
        reason: str,
        security_risk: bool = False,
    ) -> None:
        """Log tool denial decision.

        Args:
            agent_name: Name of agent that requested tool call
            tool: Tool name (Bash, Read, Write, etc.)
            parameters: Tool parameters (will be sanitized)
            reason: Human-readable explanation of denial
            security_risk: Whether denial is due to security concerns
        """
        # Sanitize parameters
        sanitized_params = self._sanitize_parameters(parameters)

        # Create audit log entry
        entry = AuditLogEntry(
            timestamp=datetime.now(timezone.utc).isoformat(),
            event="denial",
            agent=agent_name,
            tool=tool,
            reason=sanitize_log_input(reason),
            security_risk=security_risk,
            parameters=sanitized_params,
        )

        # Write JSON line to log
        self.logger.info(json.dumps(entry.to_dict()))

    def log_circuit_breaker_trip(
        self,
        agent_name: str,
        denial_count: int,
        reason: str,
    ) -> None:
        """Log circuit breaker trip event.

        Args:
            agent_name: Name of agent that triggered circuit breaker
            denial_count: Number of denials that triggered circuit breaker
            reason: Human-readable explanation
        """
        # Create audit log entry
        entry = AuditLogEntry(
            timestamp=datetime.now(timezone.utc).isoformat(),
            event="circuit_breaker_trip",
            agent=agent_name,
            reason=sanitize_log_input(reason),
            denial_count=denial_count,
        )

        # Write JSON line to log
        self.logger.info(json.dumps(entry.to_dict()))

    def _sanitize_parameters(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """Sanitize parameters to remove sensitive data.

        Args:
            parameters: Tool parameters dictionary

        Returns:
            Sanitized parameters with sensitive data redacted
        """
        sanitized = {}

        for key, value in parameters.items():
            if isinstance(value, str):
                # Redact sensitive data
                sanitized_value = value
                for pattern, replacement in SENSITIVE_PATTERNS:
                    sanitized_value = pattern.sub(replacement, sanitized_value)

                # Prevent log injection (CWE-117)
                sanitized_value = sanitize_log_input(sanitized_value)

                sanitized[key] = sanitized_value
            else:
                # Non-string values are safe (int, bool, etc.)
                sanitized[key] = value

        return sanitized


def sanitize_log_input(text: str) -> str:
    """Sanitize text input to prevent log injection (CWE-117).

    Removes newlines, carriage returns, tabs, null bytes, and ANSI escape
    sequences that could be used to inject fake log entries or break log parsing.

    Args:
        text: Text to sanitize

    Returns:
        Sanitized text with injection characters replaced by spaces
    """
    sanitized = text

    # Remove individual injection characters
    for char in INJECTION_CHARS:
        sanitized = sanitized.replace(char, ' ')

    # Remove ANSI escape sequences (multi-byte patterns like \x1b[...)
    # Pattern: ESC [ followed by any number of parameters and command letter
    ansi_escape_pattern = re.compile(r'\x1b\[[0-9;]*[a-zA-Z]')
    sanitized = ansi_escape_pattern.sub(' ', sanitized)

    return sanitized


def parse_audit_log(log_file: Optional[Path] = None) -> List[AuditLogEntry]:
    """Parse audit log file into structured entries.

    Args:
        log_file: Path to audit log file (default: logs/tool_auto_approve_audit.log)

    Returns:
        List of AuditLogEntry objects
    """
    log_file = log_file or DEFAULT_LOG_FILE

    if not log_file.exists():
        return []

    entries = []
    with open(log_file, 'r') as f:
        for line in f:
            line = line.strip()
            if not line:
                continue

            try:
                data = json.loads(line)
                entry = AuditLogEntry(**data)
                entries.append(entry)
            except (json.JSONDecodeError, TypeError) as e:
                # Skip malformed lines
                continue

    return entries


# Convenience functions for direct usage

# Global auditor instance (lazy initialization)
_global_auditor: Optional[ToolApprovalAuditor] = None
_global_auditor_lock = threading.Lock()


def _get_global_auditor() -> ToolApprovalAuditor:
    """Get or create global auditor instance.

    Returns:
        Global ToolApprovalAuditor instance
    """
    global _global_auditor, _global_auditor_lock

    with _global_auditor_lock:
        if _global_auditor is None:
            _global_auditor = ToolApprovalAuditor()
        return _global_auditor


def log_approval(
    agent_name: str,
    tool: str,
    parameters: Dict[str, Any],
    reason: str,
) -> None:
    """Log approval decision (convenience function).

    Args:
        agent_name: Agent name
        tool: Tool name
        parameters: Tool parameters
        reason: Approval reason
    """
    auditor = _get_global_auditor()
    auditor.log_approval(agent_name, tool, parameters, reason)


def log_denial(
    agent_name: str,
    tool: str,
    parameters: Dict[str, Any],
    reason: str,
    security_risk: bool = False,
) -> None:
    """Log denial decision (convenience function).

    Args:
        agent_name: Agent name
        tool: Tool name
        parameters: Tool parameters
        reason: Denial reason
        security_risk: Whether denial is due to security
    """
    auditor = _get_global_auditor()
    auditor.log_denial(agent_name, tool, parameters, reason, security_risk)


def log_circuit_breaker_trip(
    agent_name: str,
    denial_count: int,
    reason: str,
) -> None:
    """Log circuit breaker trip (convenience function).

    Args:
        agent_name: Agent name
        denial_count: Number of denials
        reason: Trip reason
    """
    auditor = _get_global_auditor()
    auditor.log_circuit_breaker_trip(agent_name, denial_count, reason)