TradingAgents/.claude/lib/batch_state_manager.py

1591 lines
55 KiB
Python

#!/usr/bin/env python3
"""
Batch State Manager - State-based tracking for /batch-implement command.
Manages persistent state for batch feature processing. Enables crash recovery,
resume functionality, and multi-feature batch processing.
DESIGN (v3.34.0): Compaction-resilient - all state is externalized (batch_state.json,
git commits, GitHub issues). Batches survive Claude Code's auto-compaction because
each feature bootstraps fresh from external state, not conversation memory.
NOTE: Context clearing functions (should_clear_context, pause_batch_for_clear,
get_clear_notification_message) are DEPRECATED. Kept for backward compatibility only.
Key Features:
1. Persistent state storage (.claude/batch_state.json)
2. Progress tracking (completed, failed, current feature)
3. Atomic writes with file locking
4. Security validations (CWE-22 path traversal, CWE-59 symlinks)
5. Crash recovery and resume
State Structure:
{
"batch_id": "batch-20251116-123456",
"features_file": "/path/to/features.txt",
"total_features": 10,
"features": ["feature 1", "feature 2", ...],
"current_index": 3,
"completed_features": [0, 1, 2],
"failed_features": [
{"feature_index": 5, "error_message": "Tests failed", "timestamp": "..."}
],
"context_token_estimate": 145000,
"auto_clear_count": 2,
"auto_clear_events": [
{"feature_index": 2, "tokens_before": 155000, "timestamp": "..."},
{"feature_index": 5, "tokens_before": 152000, "timestamp": "..."}
],
"created_at": "2025-11-16T10:00:00Z",
"updated_at": "2025-11-16T14:30:00Z",
"status": "in_progress" # in_progress, completed, failed
}
Workflow:
1. /batch-implement reads features.txt
2. create_batch_state() creates initial state
3. For each feature:
a. Process with /auto-implement
b. update_batch_progress() increments current_index
c. should_auto_clear() checks if threshold exceeded
d. If yes: record_auto_clear_event() → /clear → resume
4. cleanup_batch_state() removes state file on completion
Usage:
from batch_state_manager import (
create_batch_state,
load_batch_state,
save_batch_state,
update_batch_progress,
record_auto_clear_event,
should_auto_clear,
get_next_pending_feature,
cleanup_batch_state,
)
from path_utils import get_batch_state_file
# Create new batch
state = create_batch_state("/path/to/features.txt", ["feature 1", "feature 2"])
save_batch_state(get_batch_state_file(), state)
# Process features
while True:
next_feature = get_next_pending_feature(state)
if next_feature is None:
break
# Process feature...
# Update progress
update_batch_progress(get_batch_state_file(), state.current_index, "completed", 10000)
# Check auto-clear
state = load_batch_state(get_batch_state_file())
if should_auto_clear(state):
record_auto_clear_event(get_batch_state_file(), state.current_index, state.context_token_estimate)
# /clear command...
state = load_batch_state(get_batch_state_file())
# Cleanup
cleanup_batch_state(get_batch_state_file())
Date: 2025-11-16
Issue: #76 (State-based Auto-Clearing for /batch-implement)
Agent: implementer
Phase: TDD Green (making tests pass)
See error-handling-patterns skill for exception hierarchy and error handling best practices.
Design Patterns:
See library-design-patterns skill for standardized design patterns.
See state-management-patterns skill for standardized design patterns.
"""
import json
import os
import tempfile
import threading
import warnings
from functools import wraps
from dataclasses import dataclass, field, asdict
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Any, Optional
# Import security utilities for path validation
import sys
sys.path.insert(0, str(Path(__file__).parent))
from security_utils import validate_path, audit_log
from path_utils import get_batch_state_file
# Import sanitization functions
try:
from failure_classifier import sanitize_feature_name
except ImportError:
# Fallback for tests
def sanitize_feature_name(name: str) -> str:
"""Fallback sanitization."""
return name.replace("\n", " ").replace("\r", " ")
# =============================================================================
# Decorators
# =============================================================================
def deprecated(func):
"""Mark function as deprecated with warning.
Decorator that emits a DeprecationWarning when the decorated function is called.
Used for context clearing functions that are no longer needed due to Claude Code's
automatic context management.
Args:
func: Function to deprecate
Returns:
Wrapped function that emits deprecation warning
"""
@wraps(func)
def wrapper(*args, **kwargs):
warnings.warn(
f"{func.__name__} is deprecated but still functional. Hybrid pause/resume workflow still uses these functions.",
DeprecationWarning,
stacklevel=2
)
return func(*args, **kwargs)
return wrapper
# =============================================================================
# Constants
# =============================================================================
# Default state file location (dynamically resolved from PROJECT_ROOT - Issue #79)
# This fixes hardcoded Path(".claude/batch_state.json") which failed from subdirectories
# WARNING: This evaluates at module import time. For testing with mock project roots,
# use get_default_state_file() function instead (evaluates lazily).
try:
DEFAULT_STATE_FILE = get_batch_state_file()
except FileNotFoundError:
# Fallback for edge cases (e.g., running outside a git repo)
# This maintains backward compatibility
DEFAULT_STATE_FILE = Path(".claude/batch_state.json")
def get_default_state_file():
"""Get default state file path (lazy evaluation - use in tests).
This is a function (not a constant) to support testing scenarios where
the project root might change between test cases.
For production code, use DEFAULT_STATE_FILE constant for performance.
For tests, use this function for correct behavior with mock project roots.
Returns:
Path to default batch state file (PROJECT_ROOT/.claude/batch_state.json)
"""
try:
return get_batch_state_file()
except FileNotFoundError:
# Fallback for edge cases (e.g., running outside a git repo)
# This maintains backward compatibility
return Path(".claude/batch_state.json")
# Context token threshold (DEPRECATED - v3.34.0)
# No longer used: Compaction-resilient design survives auto-compaction via externalized state.
# Kept for backward compatibility with deprecated should_clear_context() function.
CONTEXT_THRESHOLD = 150000
# File lock timeout (seconds)
LOCK_TIMEOUT = 30
# =============================================================================
# Exceptions
# =============================================================================
class BatchStateError(Exception):
"""Base exception for batch state operations."""
pass
# =============================================================================
# Data Classes
# =============================================================================
@dataclass
class BatchState:
"""Batch processing state.
Attributes:
batch_id: Unique batch identifier
features_file: Path to features file
total_features: Total number of features in batch
features: List of feature descriptions
current_index: Index of current feature being processed
completed_features: List of completed feature indices
failed_features: List of failed feature records
context_token_estimate: Estimated context token count
auto_clear_count: Number of auto-clear events
auto_clear_events: List of auto-clear event records
created_at: ISO 8601 timestamp of batch creation
updated_at: ISO 8601 timestamp of last update
status: Batch status (in_progress/running, paused, completed, failed)
issue_numbers: Optional list of GitHub issue numbers (for --issues flag)
source_type: Source type ("file" or "issues")
state_file: Path to state file
context_tokens_before_clear: Token count before clear (for paused batches, deprecated)
paused_at_feature_index: Feature index where batch was paused (deprecated)
retry_attempts: Dict mapping feature index to retry count (Issue #89)
git_operations: Dict mapping feature index to git operation results (Issue #93)
Structure: {feature_index: {operation_type: {success, sha, branch, ...}}}
Example: {0: {"commit": {"success": True, "sha": "abc123", "branch": "feature/test"}}}
"""
batch_id: str
features_file: str
total_features: int
features: List[str]
current_index: int = 0
completed_features: List[int] = field(default_factory=list)
failed_features: List[Dict[str, Any]] = field(default_factory=list)
context_token_estimate: int = 0
auto_clear_count: int = 0
auto_clear_events: List[Dict[str, Any]] = field(default_factory=list)
created_at: str = ""
updated_at: str = ""
status: str = "in_progress"
issue_numbers: Optional[List[int]] = None
source_type: str = "file"
state_file: str = ""
context_tokens_before_clear: Optional[int] = None
paused_at_feature_index: Optional[int] = None
retry_attempts: Dict[int, int] = field(default_factory=dict) # Issue #89: Track retry counts per feature
git_operations: Dict[int, Dict[str, Any]] = field(default_factory=dict) # Issue #93: Track git operations per feature
feature_order: List[int] = field(default_factory=list) # Issue #157: Optimized execution order
feature_dependencies: Dict[int, List[int]] = field(default_factory=dict) # Issue #157: Dependency graph
analysis_metadata: Dict[str, Any] = field(default_factory=dict) # Issue #157: Analysis info (stats, timing, etc.)
# Compaction-resilience: Workflow methodology survives context summarization
workflow_mode: str = "auto-implement" # "auto-implement" or "direct" - tells Claude HOW to process features
workflow_reminder: str = "Use /auto-implement for each feature. Do NOT implement directly." # Reinjects methodology after compaction
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
return asdict(self)
# Thread-safe file lock
_file_locks: Dict[str, threading.Lock] = {}
_locks_lock = threading.Lock()
def audit_log_security_event(event_type: str, details: Dict[str, Any]) -> None:
"""Log security event to audit log.
This is a wrapper around security_utils.audit_log for security events.
Args:
event_type: Type of security event
details: Event details
"""
audit_log(event_type, "security", details)
def _get_file_lock(file_path: Path) -> threading.RLock:
"""Get or create thread-safe reentrant lock for file.
Args:
file_path: Path to file
Returns:
Threading reentrant lock for file (allows same thread to acquire multiple times)
"""
file_key = str(file_path.resolve())
with _locks_lock:
if file_key not in _file_locks:
_file_locks[file_key] = threading.RLock() # Reentrant lock
return _file_locks[file_key]
# =============================================================================
# State Creation
# =============================================================================
def create_batch_state(
features_file_or_features: Optional[str | List[str]] = None,
features_or_none: Optional[List[str]] = None,
issue_numbers: Optional[List[int]] = None,
source_type: str = "file",
state_file: Optional[str] = None,
*,
features: Optional[List[str]] = None, # Keyword-only for new calling style
features_file: Optional[str] = None, # Keyword-only for explicit features_file
batch_id: Optional[str] = None, # Optional custom batch ID
) -> BatchState:
"""Create new batch state.
Supports two calling styles for backward compatibility:
1. Old style (positional): create_batch_state(features_file, features)
2. New style (keyword): create_batch_state(features=..., state_file=..., issue_numbers=...)
Args:
features_file_or_features: Features file path (old style) OR features list (new style detection)
features_or_none: Features list (old style) or None (new style)
issue_numbers: Optional list of GitHub issue numbers (for --issues flag)
source_type: Source type ("file" or "issues")
state_file: Optional path to state file
features: Features list (keyword-only, for new calling style)
batch_id: Optional custom batch ID (keyword-only)
Returns:
Newly created BatchState
Raises:
BatchStateError: If features list is empty or features_file path is invalid
Examples:
Old style (backward compatible):
>>> state = create_batch_state("/path/to/features.txt", ["feature 1", "feature 2"])
>>> state.source_type
'file'
New style (--issues flag):
>>> state = create_batch_state(
... features=["Issue #72: Add logging"],
... issue_numbers=[72],
... source_type="issues",
... state_file="/path/to/state.json"
... )
>>> state.issue_numbers
[72]
"""
# Detect calling style
if features is not None:
# New style: features passed as keyword argument
features_list = features
# Use explicit features_file keyword if provided, otherwise empty
features_file_path = features_file if features_file is not None else ""
elif features_file_or_features is None and features_or_none is None:
# Neither positional argument provided - must use keyword 'features'
raise BatchStateError(
"Invalid arguments. Use either:\n"
" create_batch_state(features_file, features) # Old style\n"
" create_batch_state(features=..., state_file=..., issue_numbers=...) # New style"
)
elif isinstance(features_file_or_features, list):
# Ambiguous: first arg is a list (could be new style without keyword)
# Assume new style if features_or_none is None
if features_or_none is None:
features_list = features_file_or_features
features_file_path = ""
else:
# Very unlikely case: both are lists?
raise BatchStateError("Ambiguous arguments: both features_file and features appear to be lists")
elif isinstance(features_file_or_features, str) and features_or_none is not None:
# Old style: create_batch_state(features_file, features)
features_file_path = features_file_or_features
features_list = features_or_none
else:
raise BatchStateError(
"Invalid arguments. Use either:\n"
" create_batch_state(features_file, features) # Old style\n"
" create_batch_state(features=..., state_file=..., issue_numbers=...) # New style"
)
if not features_list:
raise BatchStateError("Cannot create batch state with no features")
# Sanitize feature names (CWE-117 log injection, CWE-22 path traversal)
sanitized_features = [sanitize_feature_name(f) for f in features_list]
# Validate features_file path (security) - check for obvious path traversal
# Note: features_file is just metadata, not actively accessed
if features_file_path and (".." in features_file_path or features_file_path.startswith("/tmp/../../")):
raise BatchStateError(f"Invalid features file path: path traversal detected")
# Validate batch_id for path traversal (CWE-22)
if batch_id and (".." in batch_id or "/" in batch_id or "\\" in batch_id):
raise BatchStateError(
f"Invalid batch_id: contains path traversal or directory separators. "
f"batch_id must be a simple identifier without path components."
)
# Generate unique batch ID with timestamp (including microseconds for uniqueness)
# Use provided batch_id if given, otherwise generate one
if not batch_id:
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S-%f")
batch_id = f"batch-{timestamp}"
# Create timestamps
now = datetime.utcnow().isoformat() + "Z"
return BatchState(
batch_id=batch_id,
features_file=features_file_path,
total_features=len(sanitized_features),
features=sanitized_features,
current_index=0,
completed_features=[],
failed_features=[],
context_token_estimate=0,
auto_clear_count=0,
auto_clear_events=[],
created_at=now,
updated_at=now,
status="in_progress",
issue_numbers=issue_numbers,
source_type=source_type,
state_file=state_file or "",
context_tokens_before_clear=None,
paused_at_feature_index=None,
)
# =============================================================================
# State Persistence
# =============================================================================
def save_batch_state(state_file: Path | str, state: BatchState) -> None:
"""Save batch state to JSON file (atomic write).
Uses atomic write pattern (temp file + rename) to prevent corruption.
File permissions set to 0o600 (owner read/write only).
Args:
state_file: Path to state file
state: Batch state to save
Raises:
BatchStateError: If save fails
ValueError: If path validation fails (CWE-22, CWE-59)
Security:
- Validates path with security_utils.validate_path()
- Rejects symlinks (CWE-59)
- Prevents path traversal (CWE-22)
- Atomic write (temp file + rename)
- File permissions 0o600 (owner only)
- Audit logging
Atomic Write Design:
====================
1. CREATE: tempfile.mkstemp() creates .tmp file in same directory
2. WRITE: JSON data written to .tmp file
3. RENAME: temp_path.replace(target) atomically renames file
Failure Scenarios:
==================
- Process crash during write: Temp file left, target unchanged
- Process crash during rename: Atomic, so target is old or new (not partial)
- Concurrent writes: Each gets unique temp file (last write wins)
Example:
>>> from path_utils import get_batch_state_file
>>> state = create_batch_state("/path/to/features.txt", ["feature 1"])
>>> save_batch_state(get_batch_state_file(), state)
"""
# Convert to Path
state_file = Path(state_file)
# Resolve relative paths from PROJECT_ROOT (Issue #79)
# This ensures "custom/state.json" → PROJECT_ROOT/custom/state.json
if not state_file.is_absolute():
from path_utils import get_project_root
try:
project_root = get_project_root(use_cache=False)
state_file = project_root / state_file
except FileNotFoundError:
# Fallback: if no project root, use cwd (backward compatibility)
pass
# Validate path (security)
try:
state_file = validate_path(state_file, "batch state file", allow_missing=True)
except ValueError as e:
audit_log("batch_state_save", "error", {
"error": str(e),
"path": str(state_file),
})
raise BatchStateError(str(e))
# Update timestamp
state.updated_at = datetime.utcnow().isoformat() + "Z"
# Acquire file lock
lock = _get_file_lock(state_file)
with lock:
try:
# Ensure parent directory exists
state_file.parent.mkdir(parents=True, exist_ok=True)
# Atomic write: temp file + rename
temp_fd, temp_path_str = tempfile.mkstemp(
dir=state_file.parent,
prefix=".batch_state_",
suffix=".tmp"
)
temp_path = Path(temp_path_str)
try:
# Write JSON to temp file
json_data = json.dumps(state.to_dict(), indent=2)
os.write(temp_fd, json_data.encode('utf-8'))
os.close(temp_fd)
# Set permissions (owner read/write only)
temp_path.chmod(0o600)
# Atomic rename
temp_path.replace(state_file)
# Audit log
audit_log("batch_state_save", "success", {
"batch_id": state.batch_id,
"path": str(state_file),
"features_count": state.total_features,
})
except Exception as e:
# Cleanup temp file on error
try:
os.close(temp_fd)
except:
pass
try:
temp_path.unlink()
except:
pass
raise
except OSError as e:
audit_log("batch_state_save", "error", {
"error": str(e),
"path": str(state_file),
})
# Provide more specific error messages
error_msg = str(e).lower()
if "space" in error_msg or "disk full" in error_msg:
raise BatchStateError(f"Disk space error while saving batch state: {e}")
elif "permission" in error_msg:
raise BatchStateError(f"Permission error while saving batch state: {e}")
else:
raise BatchStateError(f"Failed to save batch state: {e}")
def load_batch_state(state_file: Path | str) -> BatchState:
"""Load batch state from JSON file.
Args:
state_file: Path to state file
Returns:
Loaded BatchState
Raises:
BatchStateError: If load fails or file doesn't exist
ValueError: If path validation fails (CWE-22, CWE-59)
Security:
- Validates path with security_utils.validate_path()
- Rejects symlinks (CWE-59)
- Prevents path traversal (CWE-22)
- Graceful degradation on corrupted JSON
- Audit logging
Example:
>>> from path_utils import get_batch_state_file
>>> state = load_batch_state(get_batch_state_file())
>>> state.batch_id
'batch-20251116-123456'
"""
# Convert to Path
state_file = Path(state_file)
# Resolve relative paths from PROJECT_ROOT (Issue #79)
# This ensures "custom/state.json" → PROJECT_ROOT/custom/state.json
if not state_file.is_absolute():
from path_utils import get_project_root
try:
project_root = get_project_root(use_cache=False)
state_file = project_root / state_file
except FileNotFoundError:
# Fallback: if no project root, use cwd (backward compatibility)
pass
# Validate path (security)
try:
state_file = validate_path(state_file, "batch state file", allow_missing=False)
except ValueError as e:
audit_log("batch_state_load", "error", {
"error": str(e),
"path": str(state_file),
})
raise BatchStateError(str(e))
# Check if file exists
if not state_file.exists():
raise BatchStateError(f"Batch state file not found: {state_file}")
# Acquire file lock
lock = _get_file_lock(state_file)
with lock:
try:
# Read JSON
with open(state_file, 'r') as f:
data = json.load(f)
# Validate required fields
required_fields = [
"batch_id", "features_file", "total_features", "features",
"current_index", "status"
]
missing_fields = [field for field in required_fields if field not in data]
if missing_fields:
raise BatchStateError(f"Missing required fields: {missing_fields}")
# Backward compatibility: Add default values for new fields (Issue #77, #88)
# Old state files (pre-v3.23.0) don't have issue_numbers, source_type, state_file
if 'issue_numbers' not in data:
data['issue_numbers'] = None
if 'source_type' not in data:
data['source_type'] = 'file'
if 'state_file' not in data:
data['state_file'] = str(state_file)
# Issue #88: Deprecated fields (for backward compatibility with old state files)
if 'context_tokens_before_clear' not in data:
data['context_tokens_before_clear'] = None
if 'paused_at_feature_index' not in data:
data['paused_at_feature_index'] = None
# Issue #89: Retry tracking (for backward compatibility with old state files)
if 'retry_attempts' not in data:
data['retry_attempts'] = {}
else:
# JSON converts integer keys to strings, convert back to int
data['retry_attempts'] = {int(k): v for k, v in data['retry_attempts'].items()}
# Issue #93: Git operations tracking (for backward compatibility with old state files)
if 'git_operations' not in data:
data['git_operations'] = {}
else:
# JSON converts integer keys to strings, convert back to int
data['git_operations'] = {int(k): v for k, v in data['git_operations'].items()}
# Compaction-resilience: workflow_mode and workflow_reminder (for backward compatibility)
if 'workflow_mode' not in data:
data['workflow_mode'] = 'auto-implement'
if 'workflow_reminder' not in data:
data['workflow_reminder'] = 'Use /auto-implement for each feature. Do NOT implement directly.'
# Backward compatibility: Accept both 'running' and 'in_progress' as equivalent
# (Both are valid active states)
# Create BatchState from data
state = BatchState(**data)
# Audit log
audit_log("batch_state_load", "success", {
"batch_id": state.batch_id,
"path": str(state_file),
})
return state
except json.JSONDecodeError as e:
audit_log("batch_state_load", "error", {
"error": f"Corrupted JSON: {e}",
"path": str(state_file),
})
raise BatchStateError(f"Corrupted batch state file: {e}")
except OSError as e:
audit_log("batch_state_load", "error", {
"error": str(e),
"path": str(state_file),
})
# Provide more specific error messages
error_msg = str(e).lower()
if "permission" in error_msg:
raise BatchStateError(f"Permission error while loading batch state: {e}")
else:
raise BatchStateError(f"Failed to load batch state: {e}")
# =============================================================================
# State Updates
# =============================================================================
def update_batch_progress(
state_file: Path | str,
feature_index: int,
status: str,
context_token_delta: int = 0,
error_message: Optional[str] = None,
token_delta: Optional[int] = None, # Backward compatibility alias
) -> None:
"""Update batch progress after processing a feature.
This function is thread-safe - it uses file locking to serialize concurrent updates.
Multiple threads can call this function simultaneously with different feature indices.
Args:
state_file: Path to state file
feature_index: Index of processed feature
status: Feature status ("completed" or "failed")
context_token_delta: Tokens added during feature processing
error_message: Error message if status is "failed"
token_delta: Alias for context_token_delta (backward compatibility)
Raises:
BatchStateError: If update fails
ValueError: If feature_index is invalid
Example:
>>> from path_utils import get_batch_state_file
>>> update_batch_progress(
... state_file=get_batch_state_file(),
... feature_index=0,
... status="completed",
... context_token_delta=5000,
... )
"""
# Backward compatibility: support both parameter names
if token_delta is not None:
context_token_delta = token_delta
# Convert to Path
state_file_path = Path(state_file)
# Acquire file lock for atomic read-modify-write
# Using RLock (reentrant) so we can call load_batch_state/save_batch_state
# which also acquire the same lock
lock = _get_file_lock(state_file_path)
with lock:
# Load current state (lock is reentrant, so this is safe)
state = load_batch_state(state_file)
# Validate feature index
if feature_index < 0 or feature_index >= state.total_features:
raise BatchStateError(f"Invalid feature index: {feature_index} (total: {state.total_features})")
# Update state based on status
if status == "completed":
if feature_index not in state.completed_features:
state.completed_features.append(feature_index)
elif status == "failed":
failure_record = {
"feature_index": feature_index,
"error_message": error_message or "Unknown error",
"timestamp": datetime.utcnow().isoformat() + "Z",
}
state.failed_features.append(failure_record)
else:
raise ValueError(f"Invalid status: {status} (must be 'completed' or 'failed')")
# Update context token estimate
state.context_token_estimate += context_token_delta
# Update current_index to max of (current, feature_index + 1)
# This ensures we track progress even with concurrent updates
state.current_index = max(state.current_index, feature_index + 1)
# Update status if all features processed
if state.current_index >= state.total_features:
state.status = "completed"
# Save updated state (lock is reentrant, so this is safe)
save_batch_state(state_file, state)
def record_auto_clear_event(
state_file: Path | str,
feature_index: int,
context_tokens_before_clear: int,
) -> None:
"""Record auto-clear event in batch state.
Args:
state_file: Path to state file
feature_index: Index of feature that triggered auto-clear
context_tokens_before_clear: Token count before /clear
Raises:
BatchStateError: If record fails
Example:
>>> from path_utils import get_batch_state_file
>>> record_auto_clear_event(
... state_file=get_batch_state_file(),
... feature_index=2,
... context_tokens_before_clear=155000,
... )
"""
# Load current state
state = load_batch_state(state_file)
# Create auto-clear event record
event = {
"feature_index": feature_index,
"context_tokens_before_clear": context_tokens_before_clear,
"timestamp": datetime.utcnow().isoformat() + "Z",
}
# Update state
state.auto_clear_events.append(event)
state.auto_clear_count += 1
# Reset context token estimate after clear
state.context_token_estimate = 0
# Save updated state
save_batch_state(state_file, state)
# Audit log
audit_log("batch_auto_clear", "success", {
"batch_id": state.batch_id,
"feature_index": feature_index,
"tokens_before": context_tokens_before_clear,
"clear_count": state.auto_clear_count,
})
# =============================================================================
# State Queries
# =============================================================================
def should_auto_clear(state: BatchState) -> bool:
"""Check if context should be auto-cleared.
Args:
state: Batch state
Returns:
True if context token estimate exceeds threshold
Example:
>>> from path_utils import get_batch_state_file
>>> state = load_batch_state(get_batch_state_file())
>>> if should_auto_clear(state):
... # Trigger /clear
... pass
"""
return state.context_token_estimate >= CONTEXT_THRESHOLD
@deprecated
def should_clear_context(state: BatchState) -> bool:
"""Check if context should be cleared (DEPRECATED).
DEPRECATED: Claude Code manages context automatically with its 200K token budget.
No manual clearing needed. This function is kept for backward compatibility only.
This is the user-facing function for the hybrid clear approach.
Returns True when context reaches 150K token threshold.
Args:
state: Batch state
Returns:
True if context token estimate >= 150K tokens (but clearing is no longer needed)
Example:
>>> from path_utils import get_batch_state_file
>>> state = load_batch_state(get_batch_state_file())
>>> if should_clear_context(state): # Will emit DeprecationWarning
... # No action needed - Claude Code handles context automatically
... pass
"""
return state.context_token_estimate >= CONTEXT_THRESHOLD
def estimate_context_tokens(text: str) -> int:
"""Estimate token count for text (conservative approach).
Uses a conservative estimate of 1 token ≈ 4 characters.
This is intentionally conservative to avoid underestimating.
Args:
text: Text to estimate tokens for
Returns:
Estimated token count (chars / 4)
Example:
>>> text = "Hello world! " * 100
>>> tokens = estimate_context_tokens(text)
>>> tokens
325
"""
if not text:
return 0
# Conservative estimate: 1 token ≈ 4 characters
# This is intentionally conservative to trigger clearing before hitting actual limit
return len(text) // 4
@deprecated
def get_clear_notification_message(
batch_id_or_state: str | BatchState,
feature_index: Optional[int] = None,
tokens_before_clear: Optional[int] = None,
) -> str:
"""Format user notification message for context clearing (DEPRECATED).
DEPRECATED: Claude Code manages context automatically with its 200K token budget.
No manual clearing needed. This function is kept for backward compatibility only.
Creates a clear, actionable message instructing the user to:
1. Manually run /clear (NO LONGER NEEDED)
2. Resume batch with /batch-implement --resume <batch-id> (NO LONGER NEEDED)
Args:
batch_id_or_state: Batch ID (str) or BatchState object (backward compatible)
feature_index: Current feature index (optional, for old API)
tokens_before_clear: Token count before clear (optional, for old API)
Returns:
Formatted notification message (multi-line, readable)
Example:
>>> # Old API (batch ID, feature index, tokens)
>>> message = get_clear_notification_message("batch-123", 5, 160000)
>>> # New API (BatchState object)
>>> from path_utils import get_batch_state_file
>>> state = load_batch_state(get_batch_state_file())
>>> message = get_clear_notification_message(state)
"""
# Detect calling style
if isinstance(batch_id_or_state, str):
# Old API: get_clear_notification_message(batch_id, feature_index, tokens)
batch_id = batch_id_or_state
current_index = feature_index if feature_index is not None else 0
context_tokens = tokens_before_clear if tokens_before_clear is not None else 0
total_features = 10 # Default assumption for old API
else:
# New API: get_clear_notification_message(state)
state = batch_id_or_state
batch_id = state.batch_id
current_index = state.current_index
context_tokens = state.context_token_estimate
total_features = state.total_features
# Calculate progress
progress_pct = int((current_index / total_features) * 100) if total_features > 0 else 0
# Format token count (e.g., "155,000" or "155K")
tokens_formatted = f"{context_tokens:,}"
message = f"""========================================
CONTEXT LIMIT REACHED
========================================
Current context: {tokens_formatted} tokens (threshold: {CONTEXT_THRESHOLD:,})
Progress: {current_index}/{total_features} features ({progress_pct}%)
Batch ID: {batch_id}
The batch has been paused to prevent context overflow.
NEXT STEPS:
1. Manually run: /clear
2. Resume batch: /batch-implement --resume {batch_id}
The batch will continue from feature {current_index + 1}/{total_features}.
All completed features are saved and will be skipped on resume.
========================================
"""
return message
@deprecated
def pause_batch_for_clear(
state_file: Path | str,
feature_index_or_state: int | BatchState,
tokens_before_clear: int,
) -> None:
"""Pause batch and prepare for user-triggered context clear (DEPRECATED).
DEPRECATED: Claude Code manages context automatically with its 200K token budget.
No manual clearing needed. This function is kept for backward compatibility only.
This function:
1. Sets status to "paused" (NO LONGER NEEDED)
2. Records pause event in auto_clear_events (NO LONGER NEEDED)
3. Increments auto_clear_count (NO LONGER NEEDED)
4. Saves state to disk
After calling this function, the user must manually:
1. Run /clear (NO LONGER NEEDED)
2. Run /batch-implement --resume <batch-id> (NO LONGER NEEDED)
Args:
state_file: Path to state file
feature_index_or_state: Feature index (int) or BatchState object (backward compatible)
tokens_before_clear: Token count before clear
Raises:
BatchStateError: If save fails
Example:
>>> # Old API (feature index)
>>> pause_batch_for_clear(state_file, feature_index=2, tokens_before_clear=160000)
>>> # New API (BatchState object)
>>> from path_utils import get_batch_state_file
>>> state = load_batch_state(get_batch_state_file())
>>> pause_batch_for_clear(state_file, state, state.context_token_estimate)
"""
# Detect calling style and load state if needed
if isinstance(feature_index_or_state, int):
# Old API: pause_batch_for_clear(state_file, feature_index, tokens)
feature_index = feature_index_or_state
state = load_batch_state(state_file)
else:
# New API: pause_batch_for_clear(state_file, state, tokens)
state = feature_index_or_state
feature_index = state.current_index
# Update state (in-place modification)
state.status = "paused"
state.context_tokens_before_clear = tokens_before_clear
state.paused_at_feature_index = state.current_index
# Record pause event
pause_event = {
"feature_index": state.current_index,
"context_tokens_before_clear": tokens_before_clear,
"timestamp": datetime.utcnow().isoformat() + "Z",
}
state.auto_clear_events.append(pause_event)
state.auto_clear_count += 1
# Persist to disk
save_batch_state(state_file, state)
# Audit log
audit_log("batch_pause_for_clear", "success", {
"batch_id": state.batch_id,
"feature_index": state.current_index,
"tokens_before": tokens_before_clear,
"pause_count": state.auto_clear_count,
})
def get_next_pending_feature(state: BatchState) -> Optional[str]:
"""Get next pending feature to process.
Args:
state: Batch state
Returns:
Next feature description, or None if all features processed
Example:
>>> from path_utils import get_batch_state_file
>>> state = load_batch_state(get_batch_state_file())
>>> next_feature = get_next_pending_feature(state)
>>> if next_feature:
... # Process feature
... pass
"""
if state.current_index >= state.total_features:
return None
return state.features[state.current_index]
# =============================================================================
# State Cleanup
# =============================================================================
def cleanup_batch_state(state_file: Path | str) -> None:
"""Remove batch state file safely.
Args:
state_file: Path to state file
Raises:
BatchStateError: If cleanup fails
Example:
>>> from path_utils import get_batch_state_file
>>> cleanup_batch_state(get_batch_state_file())
"""
# Convert to Path
state_file = Path(state_file)
# Validate path (security)
try:
state_file = validate_path(state_file, "batch state file", allow_missing=True)
except ValueError as e:
audit_log("batch_state_cleanup", "error", {
"error": str(e),
"path": str(state_file),
})
raise BatchStateError(str(e))
# Acquire file lock
lock = _get_file_lock(state_file)
with lock:
try:
if state_file.exists():
state_file.unlink()
audit_log("batch_state_cleanup", "success", {
"path": str(state_file),
})
except OSError as e:
audit_log("batch_state_cleanup", "error", {
"error": str(e),
"path": str(state_file),
})
raise BatchStateError(f"Failed to cleanup batch state: {e}")
# =============================================================================
# Retry Count Tracking (Issue #89)
# =============================================================================
def get_retry_count(state: BatchState, feature_index: int) -> int:
"""
Get retry count for a specific feature.
Args:
state: Batch state
feature_index: Index of feature
Returns:
Number of retry attempts (0 if never retried)
Examples:
>>> state = load_batch_state(state_file)
>>> retry_count = get_retry_count(state, 0)
>>> print(f"Feature 0 has been retried {retry_count} times")
"""
return state.retry_attempts.get(feature_index, 0)
def increment_retry_count(state_file: Path | str, feature_index: int) -> None:
"""
Increment retry count for a feature.
Thread-safe update using file locking.
Args:
state_file: Path to batch state file
feature_index: Index of feature to increment
Examples:
>>> increment_retry_count(state_file, 0) # Increment retry count for feature 0
"""
state_path = Path(state_file)
with _get_file_lock(state_path):
# Load current state
state = load_batch_state(state_path)
# Increment retry count
current_count = state.retry_attempts.get(feature_index, 0)
state.retry_attempts[feature_index] = current_count + 1
# Update timestamp
state.updated_at = datetime.utcnow().isoformat() + "Z"
# Save updated state
save_batch_state(state_path, state)
# Audit log
audit_log("retry_count_incremented", "info", {
"feature_index": feature_index,
"new_count": state.retry_attempts[feature_index],
})
def mark_feature_status(
state_file: Path | str,
feature_index: int,
status: str,
error_message: Optional[str] = None,
retry_count: Optional[int] = None,
) -> None:
"""
Mark feature status (completed or failed) with optional retry tracking.
Thread-safe update using file locking.
Args:
state_file: Path to batch state file
feature_index: Index of feature to mark
status: Status ("completed" or "failed")
error_message: Error message if failed
retry_count: Optional retry count to record
Examples:
>>> mark_feature_status(state_file, 0, "completed")
>>> mark_feature_status(state_file, 1, "failed", "SyntaxError", retry_count=2)
"""
state_path = Path(state_file)
with _get_file_lock(state_path):
# Load current state
state = load_batch_state(state_path)
if status == "completed":
if feature_index not in state.completed_features:
state.completed_features.append(feature_index)
# Remove from failed if it was there (retry succeeded)
state.failed_features = [
f for f in state.failed_features
if f.get("feature_index") != feature_index
]
elif status == "failed":
# Add to failed list if not already there
if not any(f.get("feature_index") == feature_index for f in state.failed_features):
failure_record = {
"feature_index": feature_index,
"error_message": error_message or "Unknown error",
"timestamp": datetime.utcnow().isoformat() + "Z",
}
if retry_count is not None:
failure_record["retry_count"] = retry_count
state.failed_features.append(failure_record)
# Update timestamp
state.updated_at = datetime.utcnow().isoformat() + "Z"
# Save updated state
save_batch_state(state_path, state)
# Audit log
audit_log("feature_status_updated", "info", {
"feature_index": feature_index,
"status": status,
"retry_count": retry_count,
})
# =============================================================================
# Git Operations Tracking (Issue #93)
# =============================================================================
def record_git_operation(
state: BatchState,
feature_index: int,
operation: str,
success: bool,
commit_sha: Optional[str] = None,
branch: Optional[str] = None,
remote: Optional[str] = None,
pr_number: Optional[int] = None,
pr_url: Optional[str] = None,
error_message: Optional[str] = None,
**kwargs
) -> BatchState:
"""
Record git operation result for a feature.
Updates the state object and returns it (immutable pattern).
For batch workflow, this tracks commit/push/PR operations per feature.
Args:
state: Current batch state
feature_index: Index of feature being processed
operation: Operation type ('commit', 'push', 'pr')
success: Whether operation succeeded
commit_sha: Commit SHA (for commit operations)
branch: Branch name
remote: Remote name (for push operations)
pr_number: PR number (for pr operations)
pr_url: PR URL (for pr operations)
error_message: Error message (for failures)
**kwargs: Additional metadata
Returns:
Updated batch state with git operation recorded
Examples:
>>> state = load_batch_state(state_file)
>>> state = record_git_operation(
... state,
... feature_index=0,
... operation='commit',
... success=True,
... commit_sha='abc123',
... branch='feature/test'
... )
>>> save_batch_state(state_file, state)
"""
# Validate operation type
valid_operations = ['commit', 'push', 'pr']
if operation not in valid_operations:
raise ValueError(f"Invalid operation: {operation}. Must be one of {valid_operations}")
# Validate feature_index
if feature_index < 0 or feature_index >= state.total_features:
raise ValueError(f"Invalid feature_index: {feature_index} (total: {state.total_features})")
# Initialize feature git_operations if not exists
if feature_index not in state.git_operations:
state.git_operations[feature_index] = {}
# Build operation record
operation_record = {
"success": success,
"timestamp": datetime.utcnow().isoformat() + "Z",
}
# Add operation-specific metadata
if commit_sha:
operation_record["sha"] = commit_sha
if branch:
operation_record["branch"] = branch
if remote:
operation_record["remote"] = remote
if pr_number is not None:
operation_record["number"] = pr_number
if pr_url:
operation_record["url"] = pr_url
if error_message:
operation_record["error"] = error_message
# Add any additional metadata from kwargs
for key, value in kwargs.items():
if key not in operation_record:
operation_record[key] = value
# Record operation
state.git_operations[feature_index][operation] = operation_record
# Update timestamp
state.updated_at = datetime.utcnow().isoformat() + "Z"
# Audit log
audit_log("git_operation_recorded", "info", {
"batch_id": state.batch_id,
"feature_index": feature_index,
"operation": operation,
"success": success,
})
return state
def get_feature_git_status(
state: BatchState,
feature_index: int
) -> Optional[Dict[str, Any]]:
"""
Get git operation status for a feature.
Args:
state: Current batch state
feature_index: Index of feature
Returns:
Dict of git operations for feature, or None if no operations
Examples:
>>> state = load_batch_state(state_file)
>>> status = get_feature_git_status(state, 0)
>>> if status:
... commit = status.get('commit', {})
... if commit.get('success'):
... print(f"Commit: {commit['sha']}")
"""
return state.git_operations.get(feature_index)
# =============================================================================
# BatchStateManager Class (Backward Compatibility Wrapper)
# =============================================================================
class BatchStateManager:
"""Object-oriented wrapper for batch state functions.
Provides backward compatibility for code expecting a class-based interface.
All methods delegate to the functional API defined above.
Examples:
>>> manager = BatchStateManager()
>>> state = manager.create_batch_state(["feature 1", "feature 2"])
>>> manager.save_batch_state(state)
>>> loaded = manager.load_batch_state()
"""
def __init__(self, state_file: Optional[Path] = None):
"""Initialize manager with optional custom state file path.
Args:
state_file: Optional custom path for state file.
If None, uses default (.claude/batch_state.json)
Path is validated for security (CWE-22, CWE-59)
Raises:
ValueError: If state_file contains path traversal or is outside project
"""
self.state_file = state_file if state_file else get_default_state_file()
# Validate path if provided (security requirement)
if state_file:
from security_utils import validate_path
self.state_file = validate_path(
Path(state_file),
"batch state file",
allow_missing=True
)
# Create parent directory if it doesn't exist
self.state_file.parent.mkdir(parents=True, exist_ok=True)
def create_batch_state(
self,
features: List[str],
batch_id: Optional[str] = None,
issue_numbers: Optional[List[int]] = None
) -> BatchState:
"""Create new batch state (delegates to create_batch_state function).
Args:
features: List of feature descriptions
batch_id: Optional custom batch ID
issue_numbers: Optional list of GitHub issue numbers
Returns:
BatchState object
"""
return create_batch_state(
features=features,
state_file=str(self.state_file),
batch_id=batch_id,
issue_numbers=issue_numbers
)
def create_batch(
self,
features: List[str],
features_file: Optional[str] = None,
batch_id: Optional[str] = None,
issue_numbers: Optional[List[int]] = None
) -> BatchState:
"""Create new batch state (alias for create_batch_state).
Args:
features: List of feature descriptions
features_file: Optional path to features file (for validation)
batch_id: Optional custom batch ID
issue_numbers: Optional list of GitHub issue numbers
Returns:
BatchState object
Note:
If features_file is provided, it is validated for security but not used
(features list is the actual source of truth)
"""
# Validate features_file if provided (security requirement)
if features_file:
from security_utils import validate_path
validate_path(Path(features_file), "features file", allow_missing=True)
return create_batch_state(
features=features,
state_file=str(self.state_file),
batch_id=batch_id,
issue_numbers=issue_numbers
)
def load_batch_state(self) -> BatchState:
"""Load batch state from file (delegates to load_batch_state function).
Returns:
BatchState object
"""
return load_batch_state(self.state_file)
def load_state(self) -> BatchState:
"""Alias for load_batch_state() for backward compatibility with tests.
Returns:
BatchState object
"""
return self.load_batch_state()
def save_batch_state(self, state: BatchState) -> None:
"""Save batch state to file (delegates to save_batch_state function).
Args:
state: BatchState object to save
"""
save_batch_state(self.state_file, state)
def save_state(self, state: BatchState) -> None:
"""Alias for save_batch_state() for backward compatibility with tests.
Args:
state: BatchState object to save
"""
self.save_batch_state(state)
def update_batch_progress(
self,
feature_index: int,
status: str,
tokens_consumed: int = 0
) -> None:
"""Update batch progress (delegates to update_batch_progress function).
Args:
feature_index: Index of completed feature
status: "completed" or "failed"
tokens_consumed: Estimated tokens consumed by this feature
"""
update_batch_progress(
self.state_file,
feature_index,
status,
tokens_consumed
)
def record_auto_clear_event(
self,
feature_index: int,
tokens_before_clear: int
) -> None:
"""Record auto-clear event (delegates to record_auto_clear_event function).
Args:
feature_index: Feature index when auto-clear triggered
tokens_before_clear: Estimated tokens before clearing
"""
record_auto_clear_event(
self.state_file,
feature_index,
tokens_before_clear
)
def should_auto_clear(self) -> bool:
"""Check if auto-clear should trigger (delegates to should_auto_clear function).
Returns:
True if context should be cleared
"""
state = self.load_batch_state()
return should_auto_clear(state)
def get_next_pending_feature(self) -> Optional[str]:
"""Get next pending feature (delegates to get_next_pending_feature function).
Returns:
Next feature description or None if all complete
"""
state = self.load_batch_state()
return get_next_pending_feature(state)
def cleanup_batch_state(self) -> None:
"""Cleanup batch state file (delegates to cleanup_batch_state function)."""
cleanup_batch_state(self.state_file)