523 lines
16 KiB
Python
523 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Error Analyzer Library - Analyze captured tool errors for GitHub issue creation.
|
|
|
|
Reads error registry from .claude/logs/errors/, classifies errors using
|
|
failure_classifier.py, deduplicates via fingerprinting, and returns
|
|
structured reports for actionable errors.
|
|
|
|
Key Features:
|
|
1. Error registry reading from JSONL files
|
|
2. Integration with failure_classifier.py for transient/permanent classification
|
|
3. Error fingerprinting for deduplication
|
|
4. Filtering for actionable errors (permanent only, not transient)
|
|
5. Structured error reports for issue creation
|
|
|
|
Security:
|
|
- CWE-117: Log injection prevention via existing sanitization
|
|
- CWE-532: Secret redaction for API keys, tokens
|
|
- CWE-22: Path validation via validation.py
|
|
- CWE-400: Resource limits (max errors per session)
|
|
|
|
Date: 2025-12-13
|
|
Issue: #124 (Automated error capture and analysis)
|
|
Agent: implementer
|
|
|
|
See error-handling-patterns skill for exception hierarchy and error handling best practices.
|
|
|
|
Design Patterns:
|
|
See library-design-patterns skill for standardized design patterns.
|
|
"""
|
|
|
|
import hashlib
|
|
import json
|
|
import re
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
# Import security utilities
|
|
try:
|
|
from .security_utils import audit_log
|
|
except ImportError:
|
|
lib_dir = Path(__file__).parent.resolve()
|
|
sys.path.insert(0, str(lib_dir))
|
|
from security_utils import audit_log
|
|
|
|
# Import failure classifier
|
|
try:
|
|
from .failure_classifier import (
|
|
classify_failure,
|
|
FailureType,
|
|
sanitize_error_message,
|
|
)
|
|
except ImportError:
|
|
from failure_classifier import (
|
|
classify_failure,
|
|
FailureType,
|
|
sanitize_error_message,
|
|
)
|
|
|
|
# Import path utilities
|
|
try:
|
|
from .path_utils import get_project_root
|
|
except ImportError:
|
|
from path_utils import get_project_root
|
|
|
|
|
|
# =============================================================================
|
|
# Constants
|
|
# =============================================================================
|
|
|
|
# Maximum errors to process per session (CWE-400 resource limit)
|
|
MAX_ERRORS_PER_SESSION = 500
|
|
|
|
# Maximum error message length (prevent memory exhaustion)
|
|
MAX_ERROR_MESSAGE_LENGTH = 1000
|
|
|
|
# Secret patterns for redaction (CWE-532)
|
|
SECRET_PATTERNS = [
|
|
r"sk-[a-zA-Z0-9]{20,}", # OpenAI API key
|
|
r"anthropic_[a-zA-Z0-9_-]{20,}", # Anthropic API key
|
|
r"ghp_[a-zA-Z0-9]{20,}", # GitHub PAT
|
|
r"gho_[a-zA-Z0-9]{20,}", # GitHub OAuth token
|
|
r"ghr_[a-zA-Z0-9]{20,}", # GitHub refresh token
|
|
r"Bearer\s+[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+", # JWT
|
|
r"api[_-]?key[\"']?\s*[=:]\s*[\"']?[a-zA-Z0-9_-]{16,}", # Generic API key
|
|
r"password[\"']?\s*[=:]\s*[\"']?[^\s\"']+", # Password assignments
|
|
r"secret[\"']?\s*[=:]\s*[\"']?[a-zA-Z0-9_-]{16,}", # Generic secret
|
|
]
|
|
|
|
|
|
# =============================================================================
|
|
# Data Classes
|
|
# =============================================================================
|
|
|
|
class ErrorEntry:
|
|
"""Represents a single captured error."""
|
|
|
|
def __init__(
|
|
self,
|
|
timestamp: str,
|
|
tool_name: str,
|
|
exit_code: Optional[int],
|
|
error_message: str,
|
|
context: Optional[Dict[str, Any]] = None,
|
|
):
|
|
self.timestamp = timestamp
|
|
self.tool_name = tool_name
|
|
self.exit_code = exit_code
|
|
self.error_message = error_message
|
|
self.context = context or {}
|
|
self.failure_type: Optional[FailureType] = None
|
|
self.fingerprint: Optional[str] = None
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary for serialization."""
|
|
return {
|
|
"timestamp": self.timestamp,
|
|
"tool_name": self.tool_name,
|
|
"exit_code": self.exit_code,
|
|
"error_message": self.error_message,
|
|
"context": self.context,
|
|
"failure_type": self.failure_type.value if self.failure_type else None,
|
|
"fingerprint": self.fingerprint,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> "ErrorEntry":
|
|
"""Create from dictionary."""
|
|
entry = cls(
|
|
timestamp=data.get("timestamp", ""),
|
|
tool_name=data.get("tool_name", "unknown"),
|
|
exit_code=data.get("exit_code"),
|
|
error_message=data.get("error_message", ""),
|
|
context=data.get("context", {}),
|
|
)
|
|
if data.get("failure_type"):
|
|
entry.failure_type = FailureType(data["failure_type"])
|
|
entry.fingerprint = data.get("fingerprint")
|
|
return entry
|
|
|
|
|
|
class ErrorReport:
|
|
"""Structured report of analyzed errors for issue creation."""
|
|
|
|
def __init__(
|
|
self,
|
|
actionable_errors: List[ErrorEntry],
|
|
transient_errors: List[ErrorEntry],
|
|
duplicate_fingerprints: List[str],
|
|
total_errors: int,
|
|
session_date: str,
|
|
):
|
|
self.actionable_errors = actionable_errors
|
|
self.transient_errors = transient_errors
|
|
self.duplicate_fingerprints = duplicate_fingerprints
|
|
self.total_errors = total_errors
|
|
self.session_date = session_date
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary for serialization."""
|
|
return {
|
|
"actionable_errors": [e.to_dict() for e in self.actionable_errors],
|
|
"transient_errors": [e.to_dict() for e in self.transient_errors],
|
|
"duplicate_fingerprints": self.duplicate_fingerprints,
|
|
"total_errors": self.total_errors,
|
|
"session_date": self.session_date,
|
|
"actionable_count": len(self.actionable_errors),
|
|
"transient_count": len(self.transient_errors),
|
|
}
|
|
|
|
|
|
# =============================================================================
|
|
# Error Analyzer
|
|
# =============================================================================
|
|
|
|
class ErrorAnalyzer:
|
|
"""Analyzes captured errors for GitHub issue creation."""
|
|
|
|
def __init__(self, project_root: Optional[Path] = None):
|
|
"""
|
|
Initialize error analyzer.
|
|
|
|
Args:
|
|
project_root: Project root directory (auto-detected if not provided)
|
|
"""
|
|
if project_root is None:
|
|
project_root = get_project_root()
|
|
self.project_root = Path(project_root)
|
|
self.errors_dir = self.project_root / ".claude" / "logs" / "errors"
|
|
self._seen_fingerprints: set = set()
|
|
|
|
def read_error_registry(self, date: Optional[str] = None) -> List[ErrorEntry]:
|
|
"""
|
|
Read errors from registry for a specific date.
|
|
|
|
Args:
|
|
date: Date string (YYYY-MM-DD). If None, uses today.
|
|
|
|
Returns:
|
|
List of ErrorEntry objects
|
|
"""
|
|
if date is None:
|
|
date = datetime.now().strftime("%Y-%m-%d")
|
|
|
|
error_file = self.errors_dir / f"{date}.jsonl"
|
|
|
|
if not error_file.exists():
|
|
return []
|
|
|
|
errors = []
|
|
try:
|
|
with open(error_file, "r") as f:
|
|
for i, line in enumerate(f):
|
|
if i >= MAX_ERRORS_PER_SESSION:
|
|
audit_log(
|
|
"error_analyzer_limit_reached",
|
|
"warning",
|
|
{"max": MAX_ERRORS_PER_SESSION, "file": str(error_file)},
|
|
)
|
|
break
|
|
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
try:
|
|
data = json.loads(line)
|
|
errors.append(ErrorEntry.from_dict(data))
|
|
except json.JSONDecodeError:
|
|
continue # Skip malformed lines
|
|
|
|
except (OSError, IOError) as e:
|
|
audit_log(
|
|
"error_analyzer_read_failed",
|
|
"failure",
|
|
{"file": str(error_file), "error": str(e)},
|
|
)
|
|
|
|
return errors
|
|
|
|
def classify_errors(self, errors: List[ErrorEntry]) -> List[ErrorEntry]:
|
|
"""
|
|
Classify errors as transient or permanent.
|
|
|
|
Args:
|
|
errors: List of errors to classify
|
|
|
|
Returns:
|
|
Same list with failure_type populated
|
|
"""
|
|
for error in errors:
|
|
error.failure_type = classify_failure(error.error_message)
|
|
return errors
|
|
|
|
def create_fingerprint(self, error: ErrorEntry) -> str:
|
|
"""
|
|
Create unique fingerprint for error deduplication.
|
|
|
|
Fingerprint = hash(tool_name + error_type + normalized_message)
|
|
|
|
Args:
|
|
error: Error to fingerprint
|
|
|
|
Returns:
|
|
SHA-256 fingerprint (first 16 chars)
|
|
"""
|
|
# Normalize message: lowercase, remove numbers, collapse whitespace
|
|
normalized = error.error_message.lower()
|
|
normalized = re.sub(r"\d+", "N", normalized) # Replace numbers
|
|
normalized = re.sub(r"\s+", " ", normalized) # Collapse whitespace
|
|
normalized = normalized[:200] # Cap length for hashing
|
|
|
|
# Build fingerprint input
|
|
fingerprint_input = f"{error.tool_name}:{error.failure_type.value if error.failure_type else 'unknown'}:{normalized}"
|
|
|
|
# Hash and truncate
|
|
hash_obj = hashlib.sha256(fingerprint_input.encode("utf-8"))
|
|
return hash_obj.hexdigest()[:16]
|
|
|
|
def deduplicate_errors(self, errors: List[ErrorEntry]) -> Tuple[List[ErrorEntry], List[str]]:
|
|
"""
|
|
Remove duplicate errors based on fingerprints.
|
|
|
|
Args:
|
|
errors: List of errors to deduplicate
|
|
|
|
Returns:
|
|
Tuple of (unique errors, duplicate fingerprints)
|
|
"""
|
|
unique = []
|
|
duplicates = []
|
|
|
|
for error in errors:
|
|
fingerprint = self.create_fingerprint(error)
|
|
error.fingerprint = fingerprint
|
|
|
|
if fingerprint in self._seen_fingerprints:
|
|
duplicates.append(fingerprint)
|
|
else:
|
|
self._seen_fingerprints.add(fingerprint)
|
|
unique.append(error)
|
|
|
|
return unique, duplicates
|
|
|
|
def filter_actionable(self, errors: List[ErrorEntry]) -> Tuple[List[ErrorEntry], List[ErrorEntry]]:
|
|
"""
|
|
Filter for actionable errors (permanent only).
|
|
|
|
Args:
|
|
errors: List of classified errors
|
|
|
|
Returns:
|
|
Tuple of (actionable errors, transient errors)
|
|
"""
|
|
actionable = []
|
|
transient = []
|
|
|
|
for error in errors:
|
|
if error.failure_type == FailureType.PERMANENT:
|
|
actionable.append(error)
|
|
else:
|
|
transient.append(error)
|
|
|
|
return actionable, transient
|
|
|
|
def analyze(self, date: Optional[str] = None) -> ErrorReport:
|
|
"""
|
|
Full analysis pipeline: read, classify, deduplicate, filter.
|
|
|
|
Args:
|
|
date: Date to analyze (default: today)
|
|
|
|
Returns:
|
|
ErrorReport with actionable and transient errors
|
|
"""
|
|
if date is None:
|
|
date = datetime.now().strftime("%Y-%m-%d")
|
|
|
|
# Reset fingerprints for new analysis
|
|
self._seen_fingerprints.clear()
|
|
|
|
# Pipeline
|
|
errors = self.read_error_registry(date)
|
|
errors = self.classify_errors(errors)
|
|
errors, duplicates = self.deduplicate_errors(errors)
|
|
actionable, transient = self.filter_actionable(errors)
|
|
|
|
audit_log(
|
|
"error_analysis_complete",
|
|
"success",
|
|
{
|
|
"date": date,
|
|
"total": len(errors) + len(duplicates),
|
|
"actionable": len(actionable),
|
|
"transient": len(transient),
|
|
"duplicates": len(duplicates),
|
|
},
|
|
)
|
|
|
|
return ErrorReport(
|
|
actionable_errors=actionable,
|
|
transient_errors=transient,
|
|
duplicate_fingerprints=duplicates,
|
|
total_errors=len(errors) + len(duplicates),
|
|
session_date=date,
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Utility Functions
|
|
# =============================================================================
|
|
|
|
def redact_secrets(message: str) -> str:
|
|
"""
|
|
Redact API keys, tokens, and secrets from error messages.
|
|
|
|
Args:
|
|
message: Error message that may contain secrets
|
|
|
|
Returns:
|
|
Message with secrets redacted
|
|
"""
|
|
redacted = message
|
|
for pattern in SECRET_PATTERNS:
|
|
redacted = re.sub(pattern, "[REDACTED]", redacted, flags=re.IGNORECASE)
|
|
return redacted
|
|
|
|
|
|
def format_error_for_issue(error: ErrorEntry) -> str:
|
|
"""
|
|
Format error for GitHub issue body.
|
|
|
|
Args:
|
|
error: Error to format
|
|
|
|
Returns:
|
|
Markdown-formatted error description
|
|
"""
|
|
lines = [
|
|
f"### Error Details",
|
|
f"",
|
|
f"**Tool**: {error.tool_name}",
|
|
f"**Exit Code**: {error.exit_code if error.exit_code is not None else 'N/A'}",
|
|
f"**Type**: {error.failure_type.value if error.failure_type else 'unknown'}",
|
|
f"**Fingerprint**: `{error.fingerprint}`",
|
|
f"**Timestamp**: {error.timestamp}",
|
|
f"",
|
|
f"### Error Message",
|
|
f"```",
|
|
redact_secrets(error.error_message[:MAX_ERROR_MESSAGE_LENGTH]),
|
|
f"```",
|
|
]
|
|
|
|
if error.context:
|
|
lines.extend([
|
|
f"",
|
|
f"### Context",
|
|
f"```json",
|
|
json.dumps(error.context, indent=2)[:500],
|
|
f"```",
|
|
])
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def write_error_to_registry(
|
|
tool_name: str,
|
|
exit_code: Optional[int],
|
|
error_message: str,
|
|
context: Optional[Dict[str, Any]] = None,
|
|
project_root: Optional[Path] = None,
|
|
) -> bool:
|
|
"""
|
|
Write an error to the registry (JSONL format).
|
|
|
|
Args:
|
|
tool_name: Name of the tool that failed
|
|
exit_code: Exit code (None if not applicable)
|
|
error_message: Error message
|
|
context: Additional context
|
|
project_root: Project root (auto-detected if not provided)
|
|
|
|
Returns:
|
|
True if written successfully, False otherwise
|
|
"""
|
|
if project_root is None:
|
|
project_root = get_project_root()
|
|
|
|
errors_dir = Path(project_root) / ".claude" / "logs" / "errors"
|
|
errors_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
date = datetime.now().strftime("%Y-%m-%d")
|
|
error_file = errors_dir / f"{date}.jsonl"
|
|
|
|
# Sanitize and truncate message
|
|
safe_message = sanitize_error_message(error_message)
|
|
safe_message = redact_secrets(safe_message)
|
|
if len(safe_message) > MAX_ERROR_MESSAGE_LENGTH:
|
|
safe_message = safe_message[:MAX_ERROR_MESSAGE_LENGTH] + "...[truncated]"
|
|
|
|
entry = {
|
|
"timestamp": datetime.now().isoformat(),
|
|
"tool_name": tool_name,
|
|
"exit_code": exit_code,
|
|
"error_message": safe_message,
|
|
"context": context or {},
|
|
}
|
|
|
|
try:
|
|
with open(error_file, "a") as f:
|
|
f.write(json.dumps(entry) + "\n")
|
|
|
|
audit_log(
|
|
"error_written_to_registry",
|
|
"success",
|
|
{"tool": tool_name, "file": str(error_file)},
|
|
)
|
|
return True
|
|
|
|
except (OSError, IOError) as e:
|
|
audit_log(
|
|
"error_write_failed",
|
|
"failure",
|
|
{"tool": tool_name, "error": str(e)},
|
|
)
|
|
return False
|
|
|
|
|
|
# =============================================================================
|
|
# Module-level convenience functions
|
|
# =============================================================================
|
|
|
|
def analyze_errors(date: Optional[str] = None, project_root: Optional[Path] = None) -> ErrorReport:
|
|
"""
|
|
Convenience function to analyze errors for a date.
|
|
|
|
Args:
|
|
date: Date to analyze (default: today)
|
|
project_root: Project root (auto-detected if not provided)
|
|
|
|
Returns:
|
|
ErrorReport with analysis results
|
|
"""
|
|
analyzer = ErrorAnalyzer(project_root)
|
|
return analyzer.analyze(date)
|
|
|
|
|
|
def get_actionable_errors(date: Optional[str] = None, project_root: Optional[Path] = None) -> List[ErrorEntry]:
|
|
"""
|
|
Get only actionable (permanent) errors for a date.
|
|
|
|
Args:
|
|
date: Date to analyze (default: today)
|
|
project_root: Project root (auto-detected if not provided)
|
|
|
|
Returns:
|
|
List of actionable ErrorEntry objects
|
|
"""
|
|
report = analyze_errors(date, project_root)
|
|
return report.actionable_errors
|