TradingAgents/.claude/lib/performance_profiler.py

#!/usr/bin/env python3
"""
Performance Profiler - Track and aggregate agent execution timing

This module provides timing infrastructure for measuring agent performance
in the /auto-implement workflow. It captures execution duration, logs metrics
to JSON, and calculates aggregate statistics (min, max, avg, p95) per agent.

Features:
- Context manager interface for easy timer wrapping
- JSON logging to logs/performance_metrics.json (newline-delimited)
- Aggregate metrics calculation (min, max, avg, p95)
- Minimal overhead (<5% profiling cost)
- Thread-safe file writes
- ISO 8601 timestamps

Usage:
    from performance_profiler import PerformanceTimer, calculate_aggregate_metrics

    # Time an agent execution
    with PerformanceTimer("researcher", "Add user auth", log_to_file=True) as timer:
        # Execute agent work
        result = agent.execute()

    print(f"Duration: {timer.duration:.2f}s")

    # Calculate aggregate metrics
    durations = [10.0, 20.0, 30.0, 40.0, 50.0]
    metrics = calculate_aggregate_metrics(durations)
    print(f"Average: {metrics['avg']:.2f}s, P95: {metrics['p95']:.2f}s")

Date: 2025-11-08
GitHub Issue: #46 Phase 6 (Profiling Infrastructure)
Agent: implementer


Design Patterns:
    See library-design-patterns skill for standardized design patterns.
    See state-management-patterns skill for standardized design patterns.
"""

import json
import time
import logging
import threading
import re
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Any, Optional
import statistics

# Logger for profiler internals
logger = logging.getLogger(__name__)

# Default log path
DEFAULT_LOG_PATH = Path(__file__).parent.parent.parent.parent / "logs" / "performance_metrics.json"

# Thread lock for safe concurrent writes
_write_lock = threading.Lock()

# Import security utilities for audit logging
try:
    from .security_utils import audit_log
except ImportError:
    # Fallback if security_utils not available (shouldn't happen)
    def audit_log(component, action, details):
        logger.warning(f"Audit log: {component}.{action}: {details}")

# Precompiled regex patterns for performance
_AGENT_NAME_PATTERN = re.compile(r'^[a-zA-Z0-9_-]+$')
_CONTROL_CHAR_PATTERN = re.compile(r'[\x00-\x1f\x7f]')


def _validate_agent_name(agent_name: str) -> str:
    """
    Validate and normalize agent_name parameter.

    CWE-20: Improper Input Validation

    Security Requirements:
    - Alphanumeric + hyphens/underscores only
    - Max 256 characters
    - No paths, shell chars, control chars
    - Strip whitespace, normalize to lowercase

    Args:
        agent_name: Raw agent name input

    Returns:
        Normalized agent name (stripped, lowercased)

    Raises:
        ValueError: If agent_name contains invalid characters
    """
    # Strip whitespace
    agent_name = agent_name.strip()

    # Check for empty string
    if not agent_name:
        audit_log("performance_profiler", "validation_failure", {
            "parameter": "agent_name",
            "error": "agent_name is required (empty string)"
        })
        raise ValueError("agent_name is required and cannot be empty")

    # Check max length (256 chars)
    if len(agent_name) > 256:
        audit_log("performance_profiler", "validation_failure", {
            "parameter": "agent_name",
            "value": agent_name[:100],
            "error": "agent_name too long (max 256 chars)"
        })
        raise ValueError(f"agent_name too long (max 256 chars, got {len(agent_name)})")

    # Validate alphanumeric + hyphens/underscores only
    # Pattern: lowercase letters, numbers, hyphens, underscores
    if not _AGENT_NAME_PATTERN.match(agent_name):
        audit_log("performance_profiler", "validation_failure", {
            "parameter": "agent_name",
            "value": agent_name[:100],
            "error": "agent_name contains invalid characters"
        })
        raise ValueError(
            f"agent_name invalid: must contain only alphanumeric characters, "
            f"hyphens, and underscores. Got: {agent_name[:50]}"
        )

    # Normalize to lowercase
    return agent_name.lower()


def _validate_feature(feature: str) -> str:
    """
    Validate and normalize feature parameter.

    CWE-117: Improper Output Neutralization for Logs

    Security Requirements:
    - No newlines (\n, \r)
    - No control characters (\x00-\x1f, \x7f)
    - No tabs (\t)
    - Max 10,000 characters
    - Strip whitespace

    Args:
        feature: Raw feature description

    Returns:
        Normalized feature (stripped)

    Raises:
        ValueError: If feature contains newlines or control characters
    """
    # Strip whitespace
    feature = feature.strip()

    # Check max length (10,000 chars)
    if len(feature) > 10000:
        audit_log("performance_profiler", "validation_failure", {
            "parameter": "feature",
            "error": "feature too long (max 10,000 chars)"
        })
        raise ValueError(f"feature too long (max 10,000 chars, got {len(feature)})")

    # Reject newlines (\n, \r)
    if '\n' in feature or '\r' in feature:
        audit_log("performance_profiler", "validation_failure", {
            "parameter": "feature",
            "value": feature[:100],
            "error": "feature contains newline characters"
        })
        raise ValueError(
            "feature invalid: cannot contain newline characters (CWE-117 log injection)"
        )

    # Reject tabs (\t)
    if '\t' in feature:
        audit_log("performance_profiler", "validation_failure", {
            "parameter": "feature",
            "value": feature[:100],
            "error": "feature contains tab characters"
        })
        raise ValueError(
            "feature invalid: cannot contain tab characters (CWE-117 log injection)"
        )

    # Reject control characters (\x00-\x1f, \x7f)
    # Pattern matches any control character
    if _CONTROL_CHAR_PATTERN.search(feature):
        audit_log("performance_profiler", "validation_failure", {
            "parameter": "feature",
            "value": feature[:100],
            "error": "feature contains control characters"
        })
        raise ValueError(
            "feature invalid: cannot contain control characters (CWE-117 log injection)"
        )

    # Feature is valid
    return feature


def _validate_log_path(log_path: Path) -> Path:
    """
    Validate log_path parameter.

    CWE-22: Path Traversal

    Security Requirements:
    - Must be within logs/ directory (whitelist)
    - Must have .json extension (lowercase)
    - No parent directory references (..)
    - No hidden files (starting with .)
    - No special files (/dev/null, CON, PRN)
    - Max 4,096 characters

    Args:
        log_path: Raw log path input

    Returns:
        Resolved canonical path

    Raises:
        ValueError: If log_path is outside logs/ directory
    """
    # Resolve to canonical path (resolves symlinks and relative paths)
    try:
        resolved_path = log_path.resolve()
    except Exception as e:
        audit_log("performance_profiler", "validation_failure", {
            "parameter": "log_path",
            "value": str(log_path),
            "error": f"Cannot resolve path: {e}"
        })
        raise ValueError(f"log_path invalid: cannot resolve path: {e}")

    # Check max path length (4,096 chars)
    if len(str(resolved_path)) > 4096:
        audit_log("performance_profiler", "validation_failure", {
            "parameter": "log_path",
            "value": str(log_path)[:100],
            "error": "log_path too long (max 4,096 chars)"
        })
        raise ValueError(f"log_path too long (max 4,096 chars, got {len(str(resolved_path))})")

    # Whitelist validation: Must be in A logs/ directory (flexible for tests)
    # Check if any parent directory is named 'logs'
    has_logs_parent = any(part == "logs" for part in resolved_path.parts)

    if not has_logs_parent:
        # Get project root (4 levels up from this file) for error message
        project_root = Path(__file__).parent.parent.parent.parent.resolve()
        logs_dir = (project_root / "logs").resolve()

        audit_log("performance_profiler", "validation_failure", {
            "parameter": "log_path",
            "value": str(log_path),
            "error": f"log_path outside any logs/ directory"
        })
        raise ValueError(
            f"log_path invalid: must be within a logs/ directory. "
            f"Expected to contain 'logs' in path, got: {resolved_path}"
        )

    # Enforce .json extension (lowercase only)
    if resolved_path.suffix != '.json':
        audit_log("performance_profiler", "validation_failure", {
            "parameter": "log_path",
            "value": str(log_path),
            "error": "log_path must have .json extension"
        })
        raise ValueError(
            f"log_path invalid: must have .json extension (lowercase). "
            f"Got: {resolved_path.suffix}"
        )

    # Reject hidden files (starting with .)
    if any(part.startswith('.') for part in resolved_path.parts):
        audit_log("performance_profiler", "validation_failure", {
            "parameter": "log_path",
            "value": str(log_path),
            "error": "log_path cannot be hidden file"
        })
        raise ValueError(
            f"log_path invalid: cannot be hidden file (starting with .)"
        )

    # Reject special files
    special_files = {'/dev/null', '/dev/zero', '/dev/random', 'CON', 'PRN', 'AUX', 'NUL'}
    if resolved_path.name.upper() in special_files or str(resolved_path) in special_files:
        audit_log("performance_profiler", "validation_failure", {
            "parameter": "log_path",
            "value": str(log_path),
            "error": "log_path cannot be special file"
        })
        raise ValueError(
            f"log_path invalid: cannot be special file ({resolved_path.name})"
        )

    # Check for null bytes in path string
    if '\x00' in str(log_path):
        audit_log("performance_profiler", "validation_failure", {
            "parameter": "log_path",
            "value": str(log_path)[:100],
            "error": "log_path contains null bytes"
        })
        raise ValueError(
            f"log_path invalid: cannot contain null bytes (CWE-22 path traversal)"
        )

    # Path is valid
    return log_path


class PerformanceTimer:
    """
    Context manager for timing agent execution.

    Captures start time, end time, duration, and metadata (agent name, feature).
    Optionally logs metrics to JSON file.

    Example:
        with PerformanceTimer("researcher", "Add auth", log_to_file=True) as timer:
            do_work()
        print(f"Duration: {timer.duration:.2f}s")
    """

    def __init__(
        self,
        agent_name: str,
        feature: str,
        log_to_file: bool = False,
        log_path: Optional[Path] = None
    ):
        """
        Initialize performance timer with security validation.

        Args:
            agent_name: Name of agent being timed (validated: CWE-20)
            feature: Feature description (validated: CWE-117)
            log_to_file: Whether to log metrics to JSON file
            log_path: Optional custom log file path (validated: CWE-22)

        Raises:
            ValueError: If any parameter fails security validation
        """
        # Validate and normalize inputs (CWE-20, CWE-117, CWE-22)
        self.agent_name = _validate_agent_name(agent_name)
        self.feature = _validate_feature(feature)

        # Set logging configuration
        self.log_to_file = log_to_file

        # Validate log_path if provided (CWE-22)
        if log_path is not None:
            self.log_path = _validate_log_path(log_path)
        else:
            self.log_path = DEFAULT_LOG_PATH

        # Note: Feature truncation removed - validation already enforces 10,000 char max
        # No need to further truncate to 500 chars as tests expect full preservation

        # Timing attributes (set during execution)
        self._start_time_perf: Optional[float] = None  # perf_counter value
        self._end_time_perf: Optional[float] = None
        self.start_time: Optional[str] = None  # ISO 8601 timestamp string
        self.end_time: Optional[str] = None
        self.duration: Optional[float] = None
        self.success: bool = True  # Assume success unless exception
        self.error: Optional[str] = None  # Error message if exception

    def __enter__(self):
        """Start timing when entering context."""
        self._start_time_perf = time.perf_counter()
        # Use local time (datetime.now()) for compatibility with tests
        self.start_time = datetime.now().isoformat()
        self.start_timestamp = self.start_time  # Alias for compatibility
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """
        Stop timing when exiting context.

        Args:
            exc_type: Exception type (if exception occurred)
            exc_val: Exception value
            exc_tb: Exception traceback
        """
        self._end_time_perf = time.perf_counter()
        self.end_time = datetime.now().isoformat()
        self.end_timestamp = self.end_time  # Alias for compatibility
        self.duration = self._end_time_perf - self._start_time_perf

        # Set timestamp with Z suffix for ISO 8601 UTC format compatibility
        self.timestamp = self.start_time + "Z" if not self.start_time.endswith("Z") else self.start_time

        # Handle negative duration (clock skew) - should never happen with perf_counter
        if self.duration < 0:
            logger.warning(f"Negative duration detected: {self.duration}s. Setting to 0.")
            self.duration = 0.0

        # Mark as failure if exception occurred
        if exc_type is not None:
            self.success = False
            self.error = str(exc_val) if exc_val else "Unknown error"

        # Log to file if requested
        if self.log_to_file:
            try:
                self._write_to_log()
            except Exception as e:
                # Don't let logging errors break the main workflow
                logger.error(f"Failed to write performance metrics: {e}")

        return False  # Don't suppress exceptions

    def as_dict(self) -> Dict[str, Any]:
        """
        Convert timer data to dictionary for JSON serialization.

        Truncates feature to 500 chars to prevent log bloat.

        Returns:
            Dict with agent_name, feature (truncated), duration, timestamp, success
        """
        # Truncate feature to 500 chars for JSON output to prevent log bloat
        feature_for_json = self.feature[:500] if len(self.feature) > 500 else self.feature

        return {
            "agent_name": self.agent_name,
            "feature": feature_for_json,
            "duration": self.duration,
            "timestamp": self.timestamp,  # ISO 8601 with Z suffix
            "start_time": self.start_timestamp,
            "end_time": self.end_timestamp,
            "success": self.success
        }

    def to_json(self) -> str:
        """
        Convert timer data to JSON string.

        Returns:
            JSON string representation
        """
        return json.dumps(self.as_dict())

    def _write_to_log(self):
        """
        Write metrics to JSON log file (newline-delimited JSON format).

        Thread-safe with file lock. Creates logs/ directory if needed.
        Includes defensive validation of log_path (defense-in-depth).
        """
        # Defense-in-depth: Re-validate log_path before write
        # This protects against potential log_path modification after __init__
        validated_path = _validate_log_path(self.log_path)

        # Ensure logs directory exists
        validated_path.parent.mkdir(parents=True, exist_ok=True)

        # Thread-safe write
        with _write_lock:
            with open(validated_path, "a") as f:
                f.write(self.to_json() + "\n")


def calculate_aggregate_metrics(durations: List[float]) -> Dict[str, float]:
    """
    Calculate aggregate metrics (min, max, avg, p95, count) from duration samples.

    Args:
        durations: List of duration values in seconds

    Returns:
        Dict with keys: min, max, avg, p95, count

    Raises:
        ValueError: If durations list is empty

    Example:
        durations = [10.0, 20.0, 30.0, 40.0, 50.0]
        metrics = calculate_aggregate_metrics(durations)
        # {'min': 10.0, 'max': 50.0, 'avg': 30.0, 'p95': 48.0, 'count': 5}
    """
    if not durations:
        raise ValueError("Cannot calculate metrics for empty duration list")

    # Calculate p95 using quantiles or simple approximation
    if len(durations) == 1:
        p95 = durations[0]
    else:
        sorted_durations = sorted(durations)
        # P95 = 95th percentile
        p95_index = int(len(sorted_durations) * 0.95)
        p95 = sorted_durations[min(p95_index, len(sorted_durations) - 1)]

    return {
        "min": min(durations),
        "max": max(durations),
        "avg": statistics.mean(durations),
        "p95": p95,
        "count": len(durations)
    }


def load_metrics_from_log(log_path: Optional[Path] = None, skip_corrupted: bool = True) -> List[Dict[str, Any]]:
    """
    Load all metrics from JSON log file.

    Args:
        log_path: Optional custom log file path (Path or str)
        skip_corrupted: If True, skip corrupted lines; if False, raise exception

    Returns:
        List of metric dictionaries

    Raises:
        FileNotFoundError: If log file doesn't exist
        JSONDecodeError: If log contains invalid JSON and skip_corrupted=False
    """
    # Convert string to Path if needed
    if isinstance(log_path, str):
        log_path = Path(log_path)

    log_path = log_path or DEFAULT_LOG_PATH

    metrics = []
    try:
        with open(log_path, "r") as f:
            for line_num, line in enumerate(f, start=1):
                line = line.strip()
                if not line:
                    continue  # Skip empty lines

                try:
                    metrics.append(json.loads(line))
                except json.JSONDecodeError as e:
                    if skip_corrupted:
                        logger.warning(f"Skipping invalid JSON at line {line_num}: {e}")
                        continue
                    else:
                        raise
    except FileNotFoundError:
        if skip_corrupted:
            return []
        raise

    return metrics


def aggregate_metrics_by_agent(
    metrics: List[Dict[str, Any]],
    agent_name: Optional[str] = None
) -> Dict[str, Dict[str, float]]:
    """
    Aggregate metrics by agent name.

    Args:
        metrics: List of metric dictionaries from log
        agent_name: Optional agent name filter (if None, aggregate all agents)

    Returns:
        Dict mapping agent_name to aggregate metrics {min, max, avg, p95}

    Example:
        metrics = load_metrics_from_log()
        aggregates = aggregate_metrics_by_agent(metrics)
        print(aggregates["researcher"]["avg"])  # Average researcher time
    """
    # Group durations by agent
    agent_durations: Dict[str, List[float]] = {}

    for metric in metrics:
        agent = metric.get("agent_name")
        duration = metric.get("duration")

        # Skip invalid metrics
        if not agent or duration is None:
            continue

        # Filter by agent_name if specified
        if agent_name and agent != agent_name:
            continue

        if agent not in agent_durations:
            agent_durations[agent] = []

        agent_durations[agent].append(duration)

    # Calculate aggregates for each agent
    aggregates = {}
    for agent, durations in agent_durations.items():
        if durations:  # Only calculate if we have data
            aggregates[agent] = calculate_aggregate_metrics(durations)

    return aggregates


def generate_performance_report(
    metrics: List[Dict[str, Any]],
    feature: Optional[str] = None
) -> str:
    """
    Generate human-readable performance report.

    Args:
        metrics: List of metric dictionaries
        feature: Optional feature name for report title

    Returns:
        Formatted performance report as string

    Example:
        metrics = load_metrics_from_log()
        report = generate_performance_report(metrics, "Add user auth")
        print(report)
    """
    if not metrics:
        return "No performance data available."

    # Aggregate by agent
    aggregates = aggregate_metrics_by_agent(metrics)

    if not aggregates:
        return "No valid metrics found."

    # Build report
    lines = []
    if feature:
        lines.append(f"Performance Report: {feature}")
        lines.append("=" * (len(feature) + 20))
    else:
        lines.append("Performance Report")
        lines.append("==================")

    lines.append("")

    # Sort agents by average time (slowest first)
    sorted_agents = sorted(
        aggregates.items(),
        key=lambda x: x[1]["avg"],
        reverse=True
    )

    for agent_name, agent_metrics in sorted_agents:
        lines.append(f"{agent_name}:")
        lines.append(f"  Min:  {agent_metrics['min']:.2f}s")
        lines.append(f"  Max:  {agent_metrics['max']:.2f}s")
        lines.append(f"  Avg:  {agent_metrics['avg']:.2f}s")
        lines.append(f"  P95:  {agent_metrics['p95']:.2f}s")
        lines.append("")

    # Calculate total time
    total_time = sum(m["duration"] for m in metrics if "duration" in m)
    lines.append(f"Total Time: {total_time:.2f}s")

    return "\n".join(lines)


# Convenience functions

def aggregate_by_agent(timer_results: List[Dict[str, Any]]) -> Dict[str, Dict[str, float]]:
    """
    Aggregate metrics by agent name (alias for aggregate_metrics_by_agent).

    Args:
        timer_results: List of timer result dictionaries

    Returns:
        Dict mapping agent_name to aggregate metrics {min, max, avg, p95}

    Example:
        results = [{"agent_name": "researcher", "duration": 10.0}, ...]
        aggregates = aggregate_by_agent(results)
    """
    return aggregate_metrics_by_agent(timer_results, agent_name=None)


def generate_summary_report(metrics_by_agent: Dict[str, Dict[str, float]]) -> str:
    """
    Generate human-readable summary report from aggregated metrics.

    Args:
        metrics_by_agent: Dict mapping agent_name to metrics dict

    Returns:
        Formatted string report

    Example:
        metrics = {"researcher": {"min": 10.0, "max": 20.0, "avg": 15.0, "p95": 18.0}}
        report = generate_summary_report(metrics)
    """
    if not metrics_by_agent:
        return "No metrics available."

    lines = []
    lines.append("Performance Summary")
    lines.append("=" * 50)
    lines.append("")

    # Sort by average time (slowest first)
    sorted_agents = sorted(
        metrics_by_agent.items(),
        key=lambda x: x[1].get("avg", 0),
        reverse=True
    )

    for agent_name, metrics in sorted_agents:
        lines.append(f"{agent_name}:")
        lines.append(f"  Min:     {metrics['min']:.2f}s")
        lines.append(f"  Max:     {metrics['max']:.2f}s")
        lines.append(f"  Average: {metrics['avg']:.2f}s")
        lines.append(f"  P95:     {metrics['p95']:.2f}s")
        if "count" in metrics:
            lines.append(f"  Count:   {metrics['count']}")
        lines.append("")

    return "\n".join(lines)

def identify_bottlenecks(
    metrics_by_agent: Dict[str, Dict[str, float]],
    baseline_minutes: Optional[Dict[str, float]] = None,
    threshold_multiplier: float = 1.5
) -> List[str]:
    """
    Identify performance bottlenecks compared to baseline expectations.

    Args:
        metrics_by_agent: Dict mapping agent_name to metrics
        baseline_minutes: Optional dict mapping agent_name to baseline time in SECONDS (despite name)
        threshold_multiplier: Multiplier for baseline to determine bottleneck (default 1.5x)

    Returns:
        List of agent names that are bottlenecks

    Example:
        metrics = {"researcher": {"avg": 20.0}, "planner": {"avg": 120.0}}
        baselines = {"researcher": 10.0, "planner": 60.0}  # seconds (despite parameter name)
        bottlenecks = identify_bottlenecks(metrics, baselines)
        # Returns: ["planner"] (120s > 60s)
    """
    if not metrics_by_agent:
        return []

    bottlenecks = []

    if baseline_minutes:
        # Treat baseline_minutes values as seconds (parameter name is misleading)
        for agent_name, metrics in metrics_by_agent.items():
            avg_seconds = metrics.get("avg", 0)
            if agent_name not in baseline_minutes:
                continue

            # Use baseline value directly as seconds threshold
            baseline_threshold = baseline_minutes[agent_name]

            # If actual time exceeds baseline threshold, it's a bottleneck
            if avg_seconds > baseline_threshold:
                bottlenecks.append(agent_name)
    else:
        # Use percentile approach if no baseline provided
        avg_times = [m.get("avg", 0) for m in metrics_by_agent.values()]

        if not avg_times:
            return []

        # 75th percentile threshold
        sorted_times = sorted(avg_times)
        threshold_index = int(len(sorted_times) * 0.75)
        threshold = sorted_times[min(threshold_index, len(sorted_times) - 1)]

        # Find agents exceeding threshold
        bottlenecks = [
            agent_name
            for agent_name, metrics in metrics_by_agent.items()
            if metrics.get("avg", 0) >= threshold
        ]

    return bottlenecks


def measure_profiler_overhead(iterations: int = 1000) -> float:
    """
    Measure profiling overhead as percentage of execution time.

    Args:
        iterations: Number of iterations to test

    Returns:
        Overhead percentage (e.g., 2.5 means 2.5% overhead)

    Example:
        overhead = measure_profiler_overhead()
        print(f"Profiling overhead: {overhead:.2f}%")
    """
    # Baseline (no profiling)
    start = time.perf_counter()
    for _ in range(iterations):
        time.sleep(0.0001)  # Simulate tiny work
    baseline_duration = time.perf_counter() - start

    # With profiling
    start = time.perf_counter()
    for _ in range(iterations):
        with PerformanceTimer("test", "overhead", log_to_file=False):
            time.sleep(0.0001)
    profiled_duration = time.perf_counter() - start

    # Calculate overhead percentage
    overhead = ((profiled_duration - baseline_duration) / baseline_duration) * 100
    return overhead


# Type alias for PerformanceMetrics (backwards compatibility)
PerformanceMetrics = Dict[str, Dict[str, float]]


def analyze_performance_logs(
    log_path: Optional[Path] = None,
    skip_corrupted: bool = True
) -> Dict[str, Any]:
    """
    Analyze performance logs and return aggregate metrics per agent with bottleneck detection.

    This is a convenience function that combines load_metrics_from_log(),
    aggregate_metrics_by_agent(), and bottleneck detection into a single call.

    Args:
        log_path: Path to performance log file (defaults to logs/performance_metrics.json)
        skip_corrupted: If True, skip corrupted JSON entries instead of raising

    Returns:
        Dict with:
        - Per-agent metrics: {agent_name: {min, max, avg, p95, count}}
        - top_slowest_agents: List of top 3 slowest agents with avg_duration

        Example: {
            "researcher": {"min": 5.0, "max": 15.0, "avg": 10.0, "p95": 14.5, "count": 4},
            "planner": {"min": 10.0, "max": 20.0, "avg": 15.0, "p95": 19.0, "count": 4},
            "top_slowest_agents": [
                {"agent_name": "implementer", "avg_duration": 37.0},
                {"agent_name": "test-master", "avg_duration": 27.0},
                {"agent_name": "reviewer", "avg_duration": 22.0}
            ]
        }

    Raises:
        FileNotFoundError: If log file doesn't exist
        ValueError: If log_path validation fails (CWE-22)

    Example:
        # Analyze default log file
        metrics = analyze_performance_logs()
        print(f"Researcher avg: {metrics['researcher']['avg']:.2f}s")
        print(f"Slowest agent: {metrics['top_slowest_agents'][0]['agent_name']}")

        # Analyze custom log file
        metrics = analyze_performance_logs(Path("/tmp/perf.json"))

    Security:
        - Validates log_path to prevent CWE-22 path traversal
        - Safe JSON parsing (no arbitrary code execution)
        - Gracefully handles corrupted entries (skip_corrupted=True)

    Performance:
        - O(n) where n is number of log entries
        - < 100ms for 1000 entries on typical hardware

    Date: 2025-11-13
    Issue: #46 Phase 8.5 (Profiler Integration)
    """
    # Load metrics from log file
    metrics_list = load_metrics_from_log(log_path=log_path, skip_corrupted=skip_corrupted)

    # Aggregate metrics by agent
    aggregates = aggregate_metrics_by_agent(metrics_list)

    # If no data, return empty dict
    if not aggregates:
        return {}

    # Identify top 3 slowest agents by avg duration
    agent_avg_durations = [
        {"agent_name": agent_name, "avg_duration": metrics["avg"]}
        for agent_name, metrics in aggregates.items()
    ]
    # Sort by avg_duration descending, take top 3
    agent_avg_durations.sort(key=lambda x: x["avg_duration"], reverse=True)
    top_slowest = agent_avg_durations[:3]

    # Add top_slowest_agents to result
    result = dict(aggregates)
    result["top_slowest_agents"] = top_slowest

    return result