485 lines
16 KiB
Python
485 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
GitHub Issue Fetcher - Fetch issue titles via gh CLI for batch processing.
|
|
|
|
Provides secure GitHub issue fetching functionality for /batch-implement --issues flag.
|
|
Uses gh CLI for GitHub operations with comprehensive security validation.
|
|
|
|
Security Features:
|
|
- CWE-20: Input validation (positive integers, max 100 issues)
|
|
- CWE-78: Command injection prevention (subprocess list args, shell=False)
|
|
- CWE-117: Log injection prevention (sanitize newlines, control characters)
|
|
- Audit logging for all gh CLI operations
|
|
|
|
Key Functions:
|
|
1. validate_issue_numbers() - Validate issue numbers before subprocess calls
|
|
2. fetch_issue_title() - Fetch single issue title via gh CLI
|
|
3. fetch_issue_titles() - Batch fetch multiple issue titles
|
|
4. format_feature_description() - Format issue as feature description
|
|
|
|
Workflow:
|
|
1. Parse --issues argument: "72,73,74" → [72, 73, 74]
|
|
2. Validate issue numbers (validate_issue_numbers)
|
|
3. Fetch titles from GitHub (fetch_issue_titles)
|
|
4. Format as features (format_feature_description)
|
|
5. Create batch state with issue_numbers
|
|
|
|
Usage:
|
|
from github_issue_fetcher import (
|
|
validate_issue_numbers,
|
|
fetch_issue_titles,
|
|
format_feature_description,
|
|
)
|
|
|
|
# Parse issue numbers
|
|
issue_numbers = [72, 73, 74]
|
|
|
|
# Validate
|
|
validate_issue_numbers(issue_numbers)
|
|
|
|
# Fetch titles
|
|
issue_titles = fetch_issue_titles(issue_numbers)
|
|
# Returns: {72: "Add logging", 73: "Fix bug"}
|
|
|
|
# Format as features
|
|
features = [
|
|
format_feature_description(num, title)
|
|
for num, title in issue_titles.items()
|
|
]
|
|
# Returns: ["Issue #72: Add logging", "Issue #73: Fix bug"]
|
|
|
|
Date: 2025-11-16
|
|
Issue: #77 (Add --issues flag to /batch-implement)
|
|
Agent: implementer
|
|
Phase: TDD Green (making tests pass)
|
|
|
|
See error-handling-patterns skill for exception hierarchy and error handling best practices.
|
|
|
|
|
|
Design Patterns:
|
|
See library-design-patterns skill for standardized design patterns.
|
|
See api-integration-patterns skill for standardized design patterns.
|
|
"""
|
|
|
|
import json
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import List, Dict, Optional
|
|
from subprocess import TimeoutExpired
|
|
|
|
# Import security utilities for audit logging
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
from security_utils import audit_log
|
|
|
|
|
|
# =============================================================================
|
|
# EXCEPTIONS
|
|
# =============================================================================
|
|
|
|
|
|
class GitHubAPIError(Exception):
|
|
"""Base exception for GitHub API errors."""
|
|
pass
|
|
|
|
|
|
class IssueNotFoundError(GitHubAPIError):
|
|
"""Exception raised when GitHub issue is not found."""
|
|
pass
|
|
|
|
|
|
# =============================================================================
|
|
# CONSTANTS
|
|
# =============================================================================
|
|
|
|
|
|
# Maximum issues per batch (prevent resource exhaustion)
|
|
MAX_ISSUES_PER_BATCH = 100
|
|
|
|
# Subprocess timeout (seconds)
|
|
GH_CLI_TIMEOUT = 10
|
|
|
|
# Title truncation length (prevent log bloat)
|
|
MAX_TITLE_LENGTH = 200
|
|
|
|
|
|
# =============================================================================
|
|
# INPUT VALIDATION (CWE-20)
|
|
# =============================================================================
|
|
|
|
|
|
def validate_issue_numbers(issue_numbers: List[int]) -> None:
|
|
"""Validate GitHub issue numbers.
|
|
|
|
Security (CWE-20): Input Validation
|
|
- Accept only positive integers (>0)
|
|
- Reject zero, negative numbers
|
|
- Enforce maximum limit (100 issues per batch)
|
|
- Prevent resource exhaustion attacks
|
|
|
|
Args:
|
|
issue_numbers: List of GitHub issue numbers
|
|
|
|
Raises:
|
|
ValueError: If validation fails with helpful message
|
|
|
|
Examples:
|
|
>>> validate_issue_numbers([72, 73, 74]) # Valid
|
|
>>> validate_issue_numbers([]) # Raises ValueError
|
|
ValueError: Issue numbers list cannot be empty
|
|
>>> validate_issue_numbers([-1]) # Raises ValueError
|
|
ValueError: Invalid issue number: -1. Issue numbers must be positive integers.
|
|
>>> validate_issue_numbers([0]) # Raises ValueError
|
|
ValueError: Invalid issue number: 0. Issue numbers must be positive integers.
|
|
>>> validate_issue_numbers(list(range(1, 102))) # Raises ValueError
|
|
ValueError: Too many issues: 101. Maximum allowed is 100 issues per batch.
|
|
|
|
Security Notes:
|
|
- This function MUST be called BEFORE any subprocess calls
|
|
- Prevents command injection via invalid issue numbers
|
|
- Prevents resource exhaustion via batch size limits
|
|
"""
|
|
# Check for empty list
|
|
if not issue_numbers:
|
|
raise ValueError(
|
|
"Issue numbers list cannot be empty. "
|
|
"Provide at least one issue number."
|
|
)
|
|
|
|
# Check maximum batch size
|
|
if len(issue_numbers) > MAX_ISSUES_PER_BATCH:
|
|
raise ValueError(
|
|
f"Too many issues: {len(issue_numbers)}. "
|
|
f"Maximum allowed is {MAX_ISSUES_PER_BATCH} issues per batch. "
|
|
f"Consider splitting into multiple batches."
|
|
)
|
|
|
|
# Validate each issue number
|
|
for num in issue_numbers:
|
|
if not isinstance(num, int) or num <= 0:
|
|
raise ValueError(
|
|
f"Invalid issue number: {num}. "
|
|
f"Issue numbers must be positive integers (>0)."
|
|
)
|
|
|
|
# Audit log successful validation
|
|
audit_log("github_issue_validation", "success", {
|
|
"operation": "validate_issue_numbers",
|
|
"count": len(issue_numbers),
|
|
"issue_numbers": issue_numbers[:10], # Log first 10 for audit trail
|
|
})
|
|
|
|
|
|
# =============================================================================
|
|
# SINGLE ISSUE FETCHING (CWE-78)
|
|
# =============================================================================
|
|
|
|
|
|
def fetch_issue_title(issue_number: int) -> Optional[str]:
|
|
"""Fetch single GitHub issue title via gh CLI.
|
|
|
|
Security (CWE-78): Command Injection Prevention
|
|
- Use subprocess.run() with LIST arguments (not string)
|
|
- shell=False (CRITICAL security requirement)
|
|
- 10-second timeout to prevent hung processes
|
|
- Audit log all gh CLI operations
|
|
|
|
Args:
|
|
issue_number: GitHub issue number
|
|
|
|
Returns:
|
|
Issue title if exists, None if not found (404)
|
|
|
|
Raises:
|
|
FileNotFoundError: If gh CLI is not installed
|
|
TimeoutExpired: If gh CLI hangs (>10 seconds)
|
|
OSError: If network or system errors occur
|
|
|
|
Examples:
|
|
>>> fetch_issue_title(72) # Existing issue
|
|
'Add logging feature'
|
|
>>> fetch_issue_title(9999) # Non-existent issue
|
|
None
|
|
>>> fetch_issue_title(72) # gh CLI not installed
|
|
FileNotFoundError: gh CLI not found. Install from: https://cli.github.com
|
|
|
|
Security Notes:
|
|
- CRITICAL: Uses subprocess.run() with list args (prevents command injection)
|
|
- CRITICAL: shell=False prevents shell metacharacter attacks
|
|
- Validates subprocess command construction in tests
|
|
- All operations are audit logged
|
|
"""
|
|
try:
|
|
# SECURITY CRITICAL: Use list arguments, shell=False
|
|
# This prevents command injection via issue_number
|
|
result = subprocess.run(
|
|
['gh', 'issue', 'view', str(issue_number), '--json', 'title'],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=GH_CLI_TIMEOUT,
|
|
shell=False, # CRITICAL: Never use shell=True
|
|
)
|
|
|
|
# Check return code
|
|
if result.returncode != 0:
|
|
# Check for common errors
|
|
stderr_lower = result.stderr.lower()
|
|
|
|
# Issue not found (404)
|
|
if 'no pull requests or issues found' in stderr_lower or 'not found' in stderr_lower:
|
|
audit_log(f"Issue #{issue_number} not found (404)", "not_found", {
|
|
"operation": "fetch_issue_title",
|
|
"issue_number": issue_number,
|
|
"error": "Issue not found (404)",
|
|
})
|
|
return None
|
|
|
|
# Authentication error
|
|
if 'authentication' in stderr_lower or 'unauthorized' in stderr_lower:
|
|
audit_log("github_issue_fetch", "error", {
|
|
"operation": "fetch_issue_title",
|
|
"issue_number": issue_number,
|
|
"error": "Authentication required",
|
|
})
|
|
# Graceful degradation - return None instead of raising
|
|
return None
|
|
|
|
# Rate limit
|
|
if 'rate limit' in stderr_lower:
|
|
audit_log("github_issue_fetch", "error", {
|
|
"operation": "fetch_issue_title",
|
|
"issue_number": issue_number,
|
|
"error": "API rate limit exceeded",
|
|
})
|
|
# Graceful degradation - return None instead of raising
|
|
return None
|
|
|
|
# Other errors - graceful degradation
|
|
audit_log("github_issue_fetch", "error", {
|
|
"operation": "fetch_issue_title",
|
|
"issue_number": issue_number,
|
|
"error": result.stderr[:200], # Log first 200 chars
|
|
})
|
|
return None
|
|
|
|
# Parse JSON response
|
|
try:
|
|
data = json.loads(result.stdout)
|
|
title = data.get('title', '')
|
|
|
|
# Audit log success
|
|
audit_log(f"Successfully fetched issue #{issue_number}", "success", {
|
|
"operation": "fetch_issue_title",
|
|
"issue_number": issue_number,
|
|
"title_length": len(title),
|
|
})
|
|
|
|
return title
|
|
|
|
except json.JSONDecodeError as e:
|
|
# Graceful degradation on JSON parse error
|
|
audit_log("github_issue_fetch", "error", {
|
|
"operation": "fetch_issue_title",
|
|
"issue_number": issue_number,
|
|
"error": f"JSON parse error: {e}",
|
|
})
|
|
return None
|
|
|
|
except FileNotFoundError:
|
|
# gh CLI not installed
|
|
audit_log("github_issue_fetch", "error", {
|
|
"operation": "fetch_issue_title",
|
|
"issue_number": issue_number,
|
|
"error": "gh CLI not found",
|
|
})
|
|
raise FileNotFoundError(
|
|
"gh CLI not found. Install from: https://cli.github.com\n"
|
|
"After installing, authenticate with: gh auth login"
|
|
)
|
|
|
|
except TimeoutExpired:
|
|
# gh CLI timeout
|
|
audit_log("github_issue_fetch", "error", {
|
|
"operation": "fetch_issue_title",
|
|
"issue_number": issue_number,
|
|
"error": f"Timeout after {GH_CLI_TIMEOUT} seconds",
|
|
})
|
|
raise
|
|
|
|
except OSError as e:
|
|
# Network or system error
|
|
audit_log("github_issue_fetch", "error", {
|
|
"operation": "fetch_issue_title",
|
|
"issue_number": issue_number,
|
|
"error": str(e),
|
|
})
|
|
raise
|
|
|
|
|
|
# =============================================================================
|
|
# BATCH ISSUE FETCHING
|
|
# =============================================================================
|
|
|
|
|
|
def fetch_issue_titles(issue_numbers: List[int]) -> Dict[int, str]:
|
|
"""Batch fetch multiple GitHub issue titles.
|
|
|
|
Features:
|
|
- Call fetch_issue_title() for each issue
|
|
- Graceful degradation: skip missing issues (return None)
|
|
- Audit log batch operations
|
|
- Raise ValueError if ALL issues missing
|
|
|
|
Args:
|
|
issue_numbers: List of GitHub issue numbers
|
|
|
|
Returns:
|
|
Dict mapping issue_number → title (only successful fetches)
|
|
|
|
Raises:
|
|
ValueError: If ALL issues are missing or failed to fetch
|
|
FileNotFoundError: If gh CLI is not installed
|
|
TimeoutExpired: If gh CLI hangs
|
|
|
|
Examples:
|
|
>>> fetch_issue_titles([72, 73, 74])
|
|
{72: 'Add logging', 73: 'Fix bug', 74: 'Update docs'}
|
|
>>> fetch_issue_titles([72, 9999, 74]) # 9999 doesn't exist
|
|
{72: 'Add logging', 74: 'Update docs'}
|
|
>>> fetch_issue_titles([9998, 9999]) # All missing
|
|
ValueError: No issues found. All issue numbers are invalid or don't exist: [9998, 9999]
|
|
|
|
Security Notes:
|
|
- Input validation should be done BEFORE calling this function
|
|
- All gh CLI operations are audit logged
|
|
- Graceful degradation on missing issues
|
|
"""
|
|
# Audit log batch start
|
|
audit_log("github_issue_fetch_batch start", "info", {
|
|
"operation": "fetch_issue_titles",
|
|
"count": len(issue_numbers),
|
|
"issue_numbers": issue_numbers[:10], # Log first 10
|
|
})
|
|
|
|
results = {}
|
|
missing_issues = []
|
|
|
|
# Fetch each issue
|
|
for num in issue_numbers:
|
|
title = fetch_issue_title(num)
|
|
|
|
if title is not None:
|
|
results[num] = title
|
|
# Log successful fetch in batch context
|
|
audit_log(f"Batch: fetched issue #{num}", "info", {
|
|
"operation": "fetch_issue_titles_item",
|
|
"issue_number": num,
|
|
})
|
|
else:
|
|
missing_issues.append(num)
|
|
|
|
# Check if ALL issues failed
|
|
if not results:
|
|
audit_log("github_issue_fetch_batch", "error", {
|
|
"operation": "fetch_issue_titles",
|
|
"error": "All issues failed to fetch",
|
|
"missing_issues": missing_issues,
|
|
})
|
|
raise ValueError(
|
|
f"No issues found. All issue numbers are invalid or don't exist: {missing_issues}\n"
|
|
f"Please verify the issue numbers and try again."
|
|
)
|
|
|
|
# Log warnings for missing issues
|
|
if missing_issues:
|
|
audit_log("github_issue_fetch_batch", "warning", {
|
|
"operation": "fetch_issue_titles",
|
|
"successful": len(results),
|
|
"missing": len(missing_issues),
|
|
"missing_issues": missing_issues,
|
|
})
|
|
|
|
# Audit log batch completion
|
|
audit_log("github_issue_fetch_batch complete", "info", {
|
|
"operation": "fetch_issue_titles",
|
|
"successful": len(results),
|
|
"total": len(issue_numbers),
|
|
})
|
|
|
|
return results
|
|
|
|
|
|
# =============================================================================
|
|
# OUTPUT FORMATTING (CWE-117)
|
|
# =============================================================================
|
|
|
|
|
|
def format_feature_description(issue_number: int, title: str) -> str:
|
|
"""Format issue as feature description.
|
|
|
|
Security (CWE-117): Log Injection Prevention
|
|
- Sanitize newlines (\n, \r)
|
|
- Remove control characters (\t, \x00, \x1b)
|
|
- Truncate long titles (>200 chars → "...")
|
|
- Handle empty/whitespace-only titles
|
|
|
|
Args:
|
|
issue_number: GitHub issue number
|
|
title: Issue title (may contain malicious characters)
|
|
|
|
Returns:
|
|
Formatted feature description: "Issue #72: Add logging feature"
|
|
|
|
Examples:
|
|
>>> format_feature_description(72, "Add logging feature")
|
|
'Issue #72: Add logging feature'
|
|
>>> format_feature_description(72, "Title\\nINJECTED\\nLOG")
|
|
'Issue #72: Title INJECTED LOG'
|
|
>>> format_feature_description(72, "")
|
|
'Issue #72: (no title)'
|
|
>>> format_feature_description(72, "A" * 500)
|
|
'Issue #72: AAAA...AAA...'
|
|
|
|
Security Notes:
|
|
- CRITICAL: Sanitizes newlines to prevent log injection (CWE-117)
|
|
- Removes control characters (\t, \x00, \x1b, etc.)
|
|
- Truncates long titles to prevent log bloat
|
|
- All malicious characters are replaced or removed
|
|
"""
|
|
# Strip whitespace
|
|
title = title.strip()
|
|
|
|
# Handle empty titles
|
|
if not title:
|
|
return f"Issue #{issue_number}: (no title)"
|
|
|
|
# SECURITY: Remove newlines (CWE-117 prevention)
|
|
# Replace \n and \r with spaces
|
|
title = title.replace('\n', ' ').replace('\r', ' ')
|
|
|
|
# SECURITY: Remove control characters
|
|
# Keep only printable characters (ASCII 32-126) and space
|
|
sanitized = []
|
|
for char in title:
|
|
char_code = ord(char)
|
|
if char_code >= 32 and char_code <= 126:
|
|
sanitized.append(char)
|
|
elif char == ' ':
|
|
sanitized.append(char)
|
|
# Skip all other control characters (\t, \x00, \x1b, etc.)
|
|
|
|
title = ''.join(sanitized)
|
|
|
|
# Collapse multiple spaces
|
|
title = ' '.join(title.split())
|
|
|
|
# Handle whitespace-only after sanitization
|
|
if not title:
|
|
return f"Issue #{issue_number}: (no title)"
|
|
|
|
# SECURITY: Truncate long titles (prevent log bloat)
|
|
if len(title) > MAX_TITLE_LENGTH:
|
|
title = title[:MAX_TITLE_LENGTH] + "..."
|
|
|
|
return f"Issue #{issue_number}: {title}"
|