TradingAgents/.claude/lib/batch_state_manager.py

#!/usr/bin/env python3
"""
Batch State Manager - State-based tracking for /batch-implement command.

Manages persistent state for batch feature processing. Enables crash recovery,
resume functionality, and multi-feature batch processing.

DESIGN (v3.34.0): Compaction-resilient - all state is externalized (batch_state.json,
git commits, GitHub issues). Batches survive Claude Code's auto-compaction because
each feature bootstraps fresh from external state, not conversation memory.

NOTE: Context clearing functions (should_clear_context, pause_batch_for_clear,
get_clear_notification_message) are DEPRECATED. Kept for backward compatibility only.

Key Features:
1. Persistent state storage (.claude/batch_state.json)
2. Progress tracking (completed, failed, current feature)
3. Atomic writes with file locking
4. Security validations (CWE-22 path traversal, CWE-59 symlinks)
5. Crash recovery and resume

State Structure:
    {
        "batch_id": "batch-20251116-123456",
        "features_file": "/path/to/features.txt",
        "total_features": 10,
        "features": ["feature 1", "feature 2", ...],
        "current_index": 3,
        "completed_features": [0, 1, 2],
        "failed_features": [
            {"feature_index": 5, "error_message": "Tests failed", "timestamp": "..."}
        ],
        "context_token_estimate": 145000,
        "auto_clear_count": 2,
        "auto_clear_events": [
            {"feature_index": 2, "tokens_before": 155000, "timestamp": "..."},
            {"feature_index": 5, "tokens_before": 152000, "timestamp": "..."}
        ],
        "created_at": "2025-11-16T10:00:00Z",
        "updated_at": "2025-11-16T14:30:00Z",
        "status": "in_progress"  # in_progress, completed, failed
    }

Workflow:
    1. /batch-implement reads features.txt
    2. create_batch_state() creates initial state
    3. For each feature:
       a. Process with /auto-implement
       b. update_batch_progress() increments current_index
       c. should_auto_clear() checks if threshold exceeded
       d. If yes: record_auto_clear_event() → /clear → resume
    4. cleanup_batch_state() removes state file on completion

Usage:
    from batch_state_manager import (
        create_batch_state,
        load_batch_state,
        save_batch_state,
        update_batch_progress,
        record_auto_clear_event,
        should_auto_clear,
        get_next_pending_feature,
        cleanup_batch_state,
    )
    from path_utils import get_batch_state_file

    # Create new batch
    state = create_batch_state("/path/to/features.txt", ["feature 1", "feature 2"])
    save_batch_state(get_batch_state_file(), state)

    # Process features
    while True:
        next_feature = get_next_pending_feature(state)
        if next_feature is None:
            break

        # Process feature...

        # Update progress
        update_batch_progress(get_batch_state_file(), state.current_index, "completed", 10000)

        # Check auto-clear
        state = load_batch_state(get_batch_state_file())
        if should_auto_clear(state):
            record_auto_clear_event(get_batch_state_file(), state.current_index, state.context_token_estimate)
            # /clear command...
            state = load_batch_state(get_batch_state_file())

    # Cleanup
    cleanup_batch_state(get_batch_state_file())

Date: 2025-11-16
Issue: #76 (State-based Auto-Clearing for /batch-implement)
Agent: implementer
Phase: TDD Green (making tests pass)

See error-handling-patterns skill for exception hierarchy and error handling best practices.


Design Patterns:
    See library-design-patterns skill for standardized design patterns.
    See state-management-patterns skill for standardized design patterns.
"""

import json
import os
import tempfile
import threading
import warnings
from functools import wraps
from dataclasses import dataclass, field, asdict
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Any, Optional

# Import security utilities for path validation
import sys
sys.path.insert(0, str(Path(__file__).parent))
from security_utils import validate_path, audit_log
from path_utils import get_batch_state_file

# Import sanitization functions
try:
    from failure_classifier import sanitize_feature_name
except ImportError:
    # Fallback for tests
    def sanitize_feature_name(name: str) -> str:
        """Fallback sanitization."""
        return name.replace("\n", " ").replace("\r", " ")

# =============================================================================
# Decorators
# =============================================================================


def deprecated(func):
    """Mark function as deprecated with warning.

    Decorator that emits a DeprecationWarning when the decorated function is called.
    Used for context clearing functions that are no longer needed due to Claude Code's
    automatic context management.

    Args:
        func: Function to deprecate

    Returns:
        Wrapped function that emits deprecation warning
    """
    @wraps(func)
    def wrapper(*args, **kwargs):
        warnings.warn(
            f"{func.__name__} is deprecated but still functional. Hybrid pause/resume workflow still uses these functions.",
            DeprecationWarning,
            stacklevel=2
        )
        return func(*args, **kwargs)
    return wrapper


# =============================================================================
# Constants
# =============================================================================

# Default state file location (dynamically resolved from PROJECT_ROOT - Issue #79)
# This fixes hardcoded Path(".claude/batch_state.json") which failed from subdirectories
# WARNING: This evaluates at module import time. For testing with mock project roots,
# use get_default_state_file() function instead (evaluates lazily).
try:
    DEFAULT_STATE_FILE = get_batch_state_file()
except FileNotFoundError:
    # Fallback for edge cases (e.g., running outside a git repo)
    # This maintains backward compatibility
    DEFAULT_STATE_FILE = Path(".claude/batch_state.json")

def get_default_state_file():
    """Get default state file path (lazy evaluation - use in tests).

    This is a function (not a constant) to support testing scenarios where
    the project root might change between test cases.

    For production code, use DEFAULT_STATE_FILE constant for performance.
    For tests, use this function for correct behavior with mock project roots.

    Returns:
        Path to default batch state file (PROJECT_ROOT/.claude/batch_state.json)
    """
    try:
        return get_batch_state_file()
    except FileNotFoundError:
        # Fallback for edge cases (e.g., running outside a git repo)
        # This maintains backward compatibility
        return Path(".claude/batch_state.json")

# Context token threshold (DEPRECATED - v3.34.0)
# No longer used: Compaction-resilient design survives auto-compaction via externalized state.
# Kept for backward compatibility with deprecated should_clear_context() function.
CONTEXT_THRESHOLD = 150000

# File lock timeout (seconds)
LOCK_TIMEOUT = 30

# =============================================================================
# Exceptions
# =============================================================================


class BatchStateError(Exception):
    """Base exception for batch state operations."""
    pass


# =============================================================================
# Data Classes
# =============================================================================


@dataclass
class BatchState:
    """Batch processing state.

    Attributes:
        batch_id: Unique batch identifier
        features_file: Path to features file
        total_features: Total number of features in batch
        features: List of feature descriptions
        current_index: Index of current feature being processed
        completed_features: List of completed feature indices
        failed_features: List of failed feature records
        context_token_estimate: Estimated context token count
        auto_clear_count: Number of auto-clear events
        auto_clear_events: List of auto-clear event records
        created_at: ISO 8601 timestamp of batch creation
        updated_at: ISO 8601 timestamp of last update
        status: Batch status (in_progress/running, paused, completed, failed)
        issue_numbers: Optional list of GitHub issue numbers (for --issues flag)
        source_type: Source type ("file" or "issues")
        state_file: Path to state file
        context_tokens_before_clear: Token count before clear (for paused batches, deprecated)
        paused_at_feature_index: Feature index where batch was paused (deprecated)
        retry_attempts: Dict mapping feature index to retry count (Issue #89)
        git_operations: Dict mapping feature index to git operation results (Issue #93)
            Structure: {feature_index: {operation_type: {success, sha, branch, ...}}}
            Example: {0: {"commit": {"success": True, "sha": "abc123", "branch": "feature/test"}}}
    """
    batch_id: str
    features_file: str
    total_features: int
    features: List[str]
    current_index: int = 0
    completed_features: List[int] = field(default_factory=list)
    failed_features: List[Dict[str, Any]] = field(default_factory=list)
    context_token_estimate: int = 0
    auto_clear_count: int = 0
    auto_clear_events: List[Dict[str, Any]] = field(default_factory=list)
    created_at: str = ""
    updated_at: str = ""
    status: str = "in_progress"
    issue_numbers: Optional[List[int]] = None
    source_type: str = "file"
    state_file: str = ""
    context_tokens_before_clear: Optional[int] = None
    paused_at_feature_index: Optional[int] = None
    retry_attempts: Dict[int, int] = field(default_factory=dict)  # Issue #89: Track retry counts per feature
    git_operations: Dict[int, Dict[str, Any]] = field(default_factory=dict)  # Issue #93: Track git operations per feature
    feature_order: List[int] = field(default_factory=list)  # Issue #157: Optimized execution order
    feature_dependencies: Dict[int, List[int]] = field(default_factory=dict)  # Issue #157: Dependency graph
    analysis_metadata: Dict[str, Any] = field(default_factory=dict)  # Issue #157: Analysis info (stats, timing, etc.)
    # Compaction-resilience: Workflow methodology survives context summarization
    workflow_mode: str = "auto-implement"  # "auto-implement" or "direct" - tells Claude HOW to process features
    workflow_reminder: str = "Use /auto-implement for each feature. Do NOT implement directly."  # Reinjects methodology after compaction

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for JSON serialization."""
        return asdict(self)


# Thread-safe file lock
_file_locks: Dict[str, threading.Lock] = {}
_locks_lock = threading.Lock()


def audit_log_security_event(event_type: str, details: Dict[str, Any]) -> None:
    """Log security event to audit log.

    This is a wrapper around security_utils.audit_log for security events.

    Args:
        event_type: Type of security event
        details: Event details
    """
    audit_log(event_type, "security", details)


def _get_file_lock(file_path: Path) -> threading.RLock:
    """Get or create thread-safe reentrant lock for file.

    Args:
        file_path: Path to file

    Returns:
        Threading reentrant lock for file (allows same thread to acquire multiple times)
    """
    file_key = str(file_path.resolve())
    with _locks_lock:
        if file_key not in _file_locks:
            _file_locks[file_key] = threading.RLock()  # Reentrant lock
        return _file_locks[file_key]


# =============================================================================
# State Creation
# =============================================================================


def create_batch_state(
    features_file_or_features: Optional[str | List[str]] = None,
    features_or_none: Optional[List[str]] = None,
    issue_numbers: Optional[List[int]] = None,
    source_type: str = "file",
    state_file: Optional[str] = None,
    *,
    features: Optional[List[str]] = None,  # Keyword-only for new calling style
    features_file: Optional[str] = None,  # Keyword-only for explicit features_file
    batch_id: Optional[str] = None,  # Optional custom batch ID
) -> BatchState:
    """Create new batch state.

    Supports two calling styles for backward compatibility:
    1. Old style (positional): create_batch_state(features_file, features)
    2. New style (keyword): create_batch_state(features=..., state_file=..., issue_numbers=...)

    Args:
        features_file_or_features: Features file path (old style) OR features list (new style detection)
        features_or_none: Features list (old style) or None (new style)
        issue_numbers: Optional list of GitHub issue numbers (for --issues flag)
        source_type: Source type ("file" or "issues")
        state_file: Optional path to state file
        features: Features list (keyword-only, for new calling style)
        batch_id: Optional custom batch ID (keyword-only)

    Returns:
        Newly created BatchState

    Raises:
        BatchStateError: If features list is empty or features_file path is invalid

    Examples:
        Old style (backward compatible):
        >>> state = create_batch_state("/path/to/features.txt", ["feature 1", "feature 2"])
        >>> state.source_type
        'file'

        New style (--issues flag):
        >>> state = create_batch_state(
        ...     features=["Issue #72: Add logging"],
        ...     issue_numbers=[72],
        ...     source_type="issues",
        ...     state_file="/path/to/state.json"
        ... )
        >>> state.issue_numbers
        [72]
    """
    # Detect calling style
    if features is not None:
        # New style: features passed as keyword argument
        features_list = features
        # Use explicit features_file keyword if provided, otherwise empty
        features_file_path = features_file if features_file is not None else ""
    elif features_file_or_features is None and features_or_none is None:
        # Neither positional argument provided - must use keyword 'features'
        raise BatchStateError(
            "Invalid arguments. Use either:\n"
            "  create_batch_state(features_file, features)  # Old style\n"
            "  create_batch_state(features=..., state_file=..., issue_numbers=...)  # New style"
        )
    elif isinstance(features_file_or_features, list):
        # Ambiguous: first arg is a list (could be new style without keyword)
        # Assume new style if features_or_none is None
        if features_or_none is None:
            features_list = features_file_or_features
            features_file_path = ""
        else:
            # Very unlikely case: both are lists?
            raise BatchStateError("Ambiguous arguments: both features_file and features appear to be lists")
    elif isinstance(features_file_or_features, str) and features_or_none is not None:
        # Old style: create_batch_state(features_file, features)
        features_file_path = features_file_or_features
        features_list = features_or_none
    else:
        raise BatchStateError(
            "Invalid arguments. Use either:\n"
            "  create_batch_state(features_file, features)  # Old style\n"
            "  create_batch_state(features=..., state_file=..., issue_numbers=...)  # New style"
        )

    if not features_list:
        raise BatchStateError("Cannot create batch state with no features")

    # Sanitize feature names (CWE-117 log injection, CWE-22 path traversal)
    sanitized_features = [sanitize_feature_name(f) for f in features_list]

    # Validate features_file path (security) - check for obvious path traversal
    # Note: features_file is just metadata, not actively accessed
    if features_file_path and (".." in features_file_path or features_file_path.startswith("/tmp/../../")):
        raise BatchStateError(f"Invalid features file path: path traversal detected")

    # Validate batch_id for path traversal (CWE-22)
    if batch_id and (".." in batch_id or "/" in batch_id or "\\" in batch_id):
        raise BatchStateError(
            f"Invalid batch_id: contains path traversal or directory separators. "
            f"batch_id must be a simple identifier without path components."
        )

    # Generate unique batch ID with timestamp (including microseconds for uniqueness)
    # Use provided batch_id if given, otherwise generate one
    if not batch_id:
        timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S-%f")
        batch_id = f"batch-{timestamp}"

    # Create timestamps
    now = datetime.utcnow().isoformat() + "Z"

    return BatchState(
        batch_id=batch_id,
        features_file=features_file_path,
        total_features=len(sanitized_features),
        features=sanitized_features,
        current_index=0,
        completed_features=[],
        failed_features=[],
        context_token_estimate=0,
        auto_clear_count=0,
        auto_clear_events=[],
        created_at=now,
        updated_at=now,
        status="in_progress",
        issue_numbers=issue_numbers,
        source_type=source_type,
        state_file=state_file or "",
        context_tokens_before_clear=None,
        paused_at_feature_index=None,
    )


# =============================================================================
# State Persistence
# =============================================================================


def save_batch_state(state_file: Path | str, state: BatchState) -> None:
    """Save batch state to JSON file (atomic write).

    Uses atomic write pattern (temp file + rename) to prevent corruption.
    File permissions set to 0o600 (owner read/write only).

    Args:
        state_file: Path to state file
        state: Batch state to save

    Raises:
        BatchStateError: If save fails
        ValueError: If path validation fails (CWE-22, CWE-59)

    Security:
        - Validates path with security_utils.validate_path()
        - Rejects symlinks (CWE-59)
        - Prevents path traversal (CWE-22)
        - Atomic write (temp file + rename)
        - File permissions 0o600 (owner only)
        - Audit logging

    Atomic Write Design:
    ====================
    1. CREATE: tempfile.mkstemp() creates .tmp file in same directory
    2. WRITE: JSON data written to .tmp file
    3. RENAME: temp_path.replace(target) atomically renames file

    Failure Scenarios:
    ==================
    - Process crash during write: Temp file left, target unchanged
    - Process crash during rename: Atomic, so target is old or new (not partial)
    - Concurrent writes: Each gets unique temp file (last write wins)

    Example:
        >>> from path_utils import get_batch_state_file
        >>> state = create_batch_state("/path/to/features.txt", ["feature 1"])
        >>> save_batch_state(get_batch_state_file(), state)
    """
    # Convert to Path
    state_file = Path(state_file)

    # Resolve relative paths from PROJECT_ROOT (Issue #79)
    # This ensures "custom/state.json" → PROJECT_ROOT/custom/state.json
    if not state_file.is_absolute():
        from path_utils import get_project_root
        try:
            project_root = get_project_root(use_cache=False)
            state_file = project_root / state_file
        except FileNotFoundError:
            # Fallback: if no project root, use cwd (backward compatibility)
            pass

    # Validate path (security)
    try:
        state_file = validate_path(state_file, "batch state file", allow_missing=True)
    except ValueError as e:
        audit_log("batch_state_save", "error", {
            "error": str(e),
            "path": str(state_file),
        })
        raise BatchStateError(str(e))

    # Update timestamp
    state.updated_at = datetime.utcnow().isoformat() + "Z"

    # Acquire file lock
    lock = _get_file_lock(state_file)
    with lock:
        try:
            # Ensure parent directory exists
            state_file.parent.mkdir(parents=True, exist_ok=True)

            # Atomic write: temp file + rename
            temp_fd, temp_path_str = tempfile.mkstemp(
                dir=state_file.parent,
                prefix=".batch_state_",
                suffix=".tmp"
            )
            temp_path = Path(temp_path_str)

            try:
                # Write JSON to temp file
                json_data = json.dumps(state.to_dict(), indent=2)
                os.write(temp_fd, json_data.encode('utf-8'))
                os.close(temp_fd)

                # Set permissions (owner read/write only)
                temp_path.chmod(0o600)

                # Atomic rename
                temp_path.replace(state_file)

                # Audit log
                audit_log("batch_state_save", "success", {
                    "batch_id": state.batch_id,
                    "path": str(state_file),
                    "features_count": state.total_features,
                })

            except Exception as e:
                # Cleanup temp file on error
                try:
                    os.close(temp_fd)
                except:
                    pass
                try:
                    temp_path.unlink()
                except:
                    pass
                raise

        except OSError as e:
            audit_log("batch_state_save", "error", {
                "error": str(e),
                "path": str(state_file),
            })
            # Provide more specific error messages
            error_msg = str(e).lower()
            if "space" in error_msg or "disk full" in error_msg:
                raise BatchStateError(f"Disk space error while saving batch state: {e}")
            elif "permission" in error_msg:
                raise BatchStateError(f"Permission error while saving batch state: {e}")
            else:
                raise BatchStateError(f"Failed to save batch state: {e}")


def load_batch_state(state_file: Path | str) -> BatchState:
    """Load batch state from JSON file.

    Args:
        state_file: Path to state file

    Returns:
        Loaded BatchState

    Raises:
        BatchStateError: If load fails or file doesn't exist
        ValueError: If path validation fails (CWE-22, CWE-59)

    Security:
        - Validates path with security_utils.validate_path()
        - Rejects symlinks (CWE-59)
        - Prevents path traversal (CWE-22)
        - Graceful degradation on corrupted JSON
        - Audit logging

    Example:
        >>> from path_utils import get_batch_state_file
        >>> state = load_batch_state(get_batch_state_file())
        >>> state.batch_id
        'batch-20251116-123456'
    """
    # Convert to Path
    state_file = Path(state_file)

    # Resolve relative paths from PROJECT_ROOT (Issue #79)
    # This ensures "custom/state.json" → PROJECT_ROOT/custom/state.json
    if not state_file.is_absolute():
        from path_utils import get_project_root
        try:
            project_root = get_project_root(use_cache=False)
            state_file = project_root / state_file
        except FileNotFoundError:
            # Fallback: if no project root, use cwd (backward compatibility)
            pass

    # Validate path (security)
    try:
        state_file = validate_path(state_file, "batch state file", allow_missing=False)
    except ValueError as e:
        audit_log("batch_state_load", "error", {
            "error": str(e),
            "path": str(state_file),
        })
        raise BatchStateError(str(e))

    # Check if file exists
    if not state_file.exists():
        raise BatchStateError(f"Batch state file not found: {state_file}")

    # Acquire file lock
    lock = _get_file_lock(state_file)
    with lock:
        try:
            # Read JSON
            with open(state_file, 'r') as f:
                data = json.load(f)

            # Validate required fields
            required_fields = [
                "batch_id", "features_file", "total_features", "features",
                "current_index", "status"
            ]
            missing_fields = [field for field in required_fields if field not in data]
            if missing_fields:
                raise BatchStateError(f"Missing required fields: {missing_fields}")

            # Backward compatibility: Add default values for new fields (Issue #77, #88)
            # Old state files (pre-v3.23.0) don't have issue_numbers, source_type, state_file
            if 'issue_numbers' not in data:
                data['issue_numbers'] = None
            if 'source_type' not in data:
                data['source_type'] = 'file'
            if 'state_file' not in data:
                data['state_file'] = str(state_file)
            # Issue #88: Deprecated fields (for backward compatibility with old state files)
            if 'context_tokens_before_clear' not in data:
                data['context_tokens_before_clear'] = None
            if 'paused_at_feature_index' not in data:
                data['paused_at_feature_index'] = None
            # Issue #89: Retry tracking (for backward compatibility with old state files)
            if 'retry_attempts' not in data:
                data['retry_attempts'] = {}
            else:
                # JSON converts integer keys to strings, convert back to int
                data['retry_attempts'] = {int(k): v for k, v in data['retry_attempts'].items()}

            # Issue #93: Git operations tracking (for backward compatibility with old state files)
            if 'git_operations' not in data:
                data['git_operations'] = {}
            else:
                # JSON converts integer keys to strings, convert back to int
                data['git_operations'] = {int(k): v for k, v in data['git_operations'].items()}

            # Compaction-resilience: workflow_mode and workflow_reminder (for backward compatibility)
            if 'workflow_mode' not in data:
                data['workflow_mode'] = 'auto-implement'
            if 'workflow_reminder' not in data:
                data['workflow_reminder'] = 'Use /auto-implement for each feature. Do NOT implement directly.'

            # Backward compatibility: Accept both 'running' and 'in_progress' as equivalent
            # (Both are valid active states)

            # Create BatchState from data
            state = BatchState(**data)

            # Audit log
            audit_log("batch_state_load", "success", {
                "batch_id": state.batch_id,
                "path": str(state_file),
            })

            return state

        except json.JSONDecodeError as e:
            audit_log("batch_state_load", "error", {
                "error": f"Corrupted JSON: {e}",
                "path": str(state_file),
            })
            raise BatchStateError(f"Corrupted batch state file: {e}")
        except OSError as e:
            audit_log("batch_state_load", "error", {
                "error": str(e),
                "path": str(state_file),
            })
            # Provide more specific error messages
            error_msg = str(e).lower()
            if "permission" in error_msg:
                raise BatchStateError(f"Permission error while loading batch state: {e}")
            else:
                raise BatchStateError(f"Failed to load batch state: {e}")


# =============================================================================
# State Updates
# =============================================================================


def update_batch_progress(
    state_file: Path | str,
    feature_index: int,
    status: str,
    context_token_delta: int = 0,
    error_message: Optional[str] = None,
    token_delta: Optional[int] = None,  # Backward compatibility alias
) -> None:
    """Update batch progress after processing a feature.

    This function is thread-safe - it uses file locking to serialize concurrent updates.
    Multiple threads can call this function simultaneously with different feature indices.

    Args:
        state_file: Path to state file
        feature_index: Index of processed feature
        status: Feature status ("completed" or "failed")
        context_token_delta: Tokens added during feature processing
        error_message: Error message if status is "failed"
        token_delta: Alias for context_token_delta (backward compatibility)

    Raises:
        BatchStateError: If update fails
        ValueError: If feature_index is invalid

    Example:
        >>> from path_utils import get_batch_state_file
        >>> update_batch_progress(
        ...     state_file=get_batch_state_file(),
        ...     feature_index=0,
        ...     status="completed",
        ...     context_token_delta=5000,
        ... )
    """
    # Backward compatibility: support both parameter names
    if token_delta is not None:
        context_token_delta = token_delta
    # Convert to Path
    state_file_path = Path(state_file)

    # Acquire file lock for atomic read-modify-write
    # Using RLock (reentrant) so we can call load_batch_state/save_batch_state
    # which also acquire the same lock
    lock = _get_file_lock(state_file_path)
    with lock:
        # Load current state (lock is reentrant, so this is safe)
        state = load_batch_state(state_file)

        # Validate feature index
        if feature_index < 0 or feature_index >= state.total_features:
            raise BatchStateError(f"Invalid feature index: {feature_index} (total: {state.total_features})")

        # Update state based on status
        if status == "completed":
            if feature_index not in state.completed_features:
                state.completed_features.append(feature_index)
        elif status == "failed":
            failure_record = {
                "feature_index": feature_index,
                "error_message": error_message or "Unknown error",
                "timestamp": datetime.utcnow().isoformat() + "Z",
            }
            state.failed_features.append(failure_record)
        else:
            raise ValueError(f"Invalid status: {status} (must be 'completed' or 'failed')")

        # Update context token estimate
        state.context_token_estimate += context_token_delta

        # Update current_index to max of (current, feature_index + 1)
        # This ensures we track progress even with concurrent updates
        state.current_index = max(state.current_index, feature_index + 1)

        # Update status if all features processed
        if state.current_index >= state.total_features:
            state.status = "completed"

        # Save updated state (lock is reentrant, so this is safe)
        save_batch_state(state_file, state)


def record_auto_clear_event(
    state_file: Path | str,
    feature_index: int,
    context_tokens_before_clear: int,
) -> None:
    """Record auto-clear event in batch state.

    Args:
        state_file: Path to state file
        feature_index: Index of feature that triggered auto-clear
        context_tokens_before_clear: Token count before /clear

    Raises:
        BatchStateError: If record fails

    Example:
        >>> from path_utils import get_batch_state_file
        >>> record_auto_clear_event(
        ...     state_file=get_batch_state_file(),
        ...     feature_index=2,
        ...     context_tokens_before_clear=155000,
        ... )
    """
    # Load current state
    state = load_batch_state(state_file)

    # Create auto-clear event record
    event = {
        "feature_index": feature_index,
        "context_tokens_before_clear": context_tokens_before_clear,
        "timestamp": datetime.utcnow().isoformat() + "Z",
    }

    # Update state
    state.auto_clear_events.append(event)
    state.auto_clear_count += 1

    # Reset context token estimate after clear
    state.context_token_estimate = 0

    # Save updated state
    save_batch_state(state_file, state)

    # Audit log
    audit_log("batch_auto_clear", "success", {
        "batch_id": state.batch_id,
        "feature_index": feature_index,
        "tokens_before": context_tokens_before_clear,
        "clear_count": state.auto_clear_count,
    })


# =============================================================================
# State Queries
# =============================================================================


def should_auto_clear(state: BatchState) -> bool:
    """Check if context should be auto-cleared.

    Args:
        state: Batch state

    Returns:
        True if context token estimate exceeds threshold

    Example:
        >>> from path_utils import get_batch_state_file
        >>> state = load_batch_state(get_batch_state_file())
        >>> if should_auto_clear(state):
        ...     # Trigger /clear
        ...     pass
    """
    return state.context_token_estimate >= CONTEXT_THRESHOLD


@deprecated
def should_clear_context(state: BatchState) -> bool:
    """Check if context should be cleared (DEPRECATED).

    DEPRECATED: Claude Code manages context automatically with its 200K token budget.
    No manual clearing needed. This function is kept for backward compatibility only.

    This is the user-facing function for the hybrid clear approach.
    Returns True when context reaches 150K token threshold.

    Args:
        state: Batch state

    Returns:
        True if context token estimate >= 150K tokens (but clearing is no longer needed)

    Example:
        >>> from path_utils import get_batch_state_file
        >>> state = load_batch_state(get_batch_state_file())
        >>> if should_clear_context(state):  # Will emit DeprecationWarning
        ...     # No action needed - Claude Code handles context automatically
        ...     pass
    """
    return state.context_token_estimate >= CONTEXT_THRESHOLD


def estimate_context_tokens(text: str) -> int:
    """Estimate token count for text (conservative approach).

    Uses a conservative estimate of 1 token ≈ 4 characters.
    This is intentionally conservative to avoid underestimating.

    Args:
        text: Text to estimate tokens for

    Returns:
        Estimated token count (chars / 4)

    Example:
        >>> text = "Hello world! " * 100
        >>> tokens = estimate_context_tokens(text)
        >>> tokens
        325
    """
    if not text:
        return 0

    # Conservative estimate: 1 token ≈ 4 characters
    # This is intentionally conservative to trigger clearing before hitting actual limit
    return len(text) // 4


@deprecated
def get_clear_notification_message(
    batch_id_or_state: str | BatchState,
    feature_index: Optional[int] = None,
    tokens_before_clear: Optional[int] = None,
) -> str:
    """Format user notification message for context clearing (DEPRECATED).

    DEPRECATED: Claude Code manages context automatically with its 200K token budget.
    No manual clearing needed. This function is kept for backward compatibility only.

    Creates a clear, actionable message instructing the user to:
    1. Manually run /clear (NO LONGER NEEDED)
    2. Resume batch with /batch-implement --resume <batch-id> (NO LONGER NEEDED)

    Args:
        batch_id_or_state: Batch ID (str) or BatchState object (backward compatible)
        feature_index: Current feature index (optional, for old API)
        tokens_before_clear: Token count before clear (optional, for old API)

    Returns:
        Formatted notification message (multi-line, readable)

    Example:
        >>> # Old API (batch ID, feature index, tokens)
        >>> message = get_clear_notification_message("batch-123", 5, 160000)

        >>> # New API (BatchState object)
        >>> from path_utils import get_batch_state_file
        >>> state = load_batch_state(get_batch_state_file())
        >>> message = get_clear_notification_message(state)
    """
    # Detect calling style
    if isinstance(batch_id_or_state, str):
        # Old API: get_clear_notification_message(batch_id, feature_index, tokens)
        batch_id = batch_id_or_state
        current_index = feature_index if feature_index is not None else 0
        context_tokens = tokens_before_clear if tokens_before_clear is not None else 0
        total_features = 10  # Default assumption for old API
    else:
        # New API: get_clear_notification_message(state)
        state = batch_id_or_state
        batch_id = state.batch_id
        current_index = state.current_index
        context_tokens = state.context_token_estimate
        total_features = state.total_features

    # Calculate progress
    progress_pct = int((current_index / total_features) * 100) if total_features > 0 else 0

    # Format token count (e.g., "155,000" or "155K")
    tokens_formatted = f"{context_tokens:,}"

    message = f"""========================================
CONTEXT LIMIT REACHED
========================================

Current context: {tokens_formatted} tokens (threshold: {CONTEXT_THRESHOLD:,})
Progress: {current_index}/{total_features} features ({progress_pct}%)
Batch ID: {batch_id}

The batch has been paused to prevent context overflow.

NEXT STEPS:
1. Manually run: /clear
2. Resume batch: /batch-implement --resume {batch_id}

The batch will continue from feature {current_index + 1}/{total_features}.
All completed features are saved and will be skipped on resume.

========================================
"""
    return message


@deprecated
def pause_batch_for_clear(
    state_file: Path | str,
    feature_index_or_state: int | BatchState,
    tokens_before_clear: int,
) -> None:
    """Pause batch and prepare for user-triggered context clear (DEPRECATED).

    DEPRECATED: Claude Code manages context automatically with its 200K token budget.
    No manual clearing needed. This function is kept for backward compatibility only.

    This function:
    1. Sets status to "paused" (NO LONGER NEEDED)
    2. Records pause event in auto_clear_events (NO LONGER NEEDED)
    3. Increments auto_clear_count (NO LONGER NEEDED)
    4. Saves state to disk

    After calling this function, the user must manually:
    1. Run /clear (NO LONGER NEEDED)
    2. Run /batch-implement --resume <batch-id> (NO LONGER NEEDED)

    Args:
        state_file: Path to state file
        feature_index_or_state: Feature index (int) or BatchState object (backward compatible)
        tokens_before_clear: Token count before clear

    Raises:
        BatchStateError: If save fails

    Example:
        >>> # Old API (feature index)
        >>> pause_batch_for_clear(state_file, feature_index=2, tokens_before_clear=160000)

        >>> # New API (BatchState object)
        >>> from path_utils import get_batch_state_file
        >>> state = load_batch_state(get_batch_state_file())
        >>> pause_batch_for_clear(state_file, state, state.context_token_estimate)
    """
    # Detect calling style and load state if needed
    if isinstance(feature_index_or_state, int):
        # Old API: pause_batch_for_clear(state_file, feature_index, tokens)
        feature_index = feature_index_or_state
        state = load_batch_state(state_file)
    else:
        # New API: pause_batch_for_clear(state_file, state, tokens)
        state = feature_index_or_state
        feature_index = state.current_index

    # Update state (in-place modification)
    state.status = "paused"
    state.context_tokens_before_clear = tokens_before_clear
    state.paused_at_feature_index = state.current_index

    # Record pause event
    pause_event = {
        "feature_index": state.current_index,
        "context_tokens_before_clear": tokens_before_clear,
        "timestamp": datetime.utcnow().isoformat() + "Z",
    }
    state.auto_clear_events.append(pause_event)
    state.auto_clear_count += 1

    # Persist to disk
    save_batch_state(state_file, state)

    # Audit log
    audit_log("batch_pause_for_clear", "success", {
        "batch_id": state.batch_id,
        "feature_index": state.current_index,
        "tokens_before": tokens_before_clear,
        "pause_count": state.auto_clear_count,
    })


def get_next_pending_feature(state: BatchState) -> Optional[str]:
    """Get next pending feature to process.

    Args:
        state: Batch state

    Returns:
        Next feature description, or None if all features processed

    Example:
        >>> from path_utils import get_batch_state_file
        >>> state = load_batch_state(get_batch_state_file())
        >>> next_feature = get_next_pending_feature(state)
        >>> if next_feature:
        ...     # Process feature
        ...     pass
    """
    if state.current_index >= state.total_features:
        return None
    return state.features[state.current_index]


# =============================================================================
# State Cleanup
# =============================================================================


def cleanup_batch_state(state_file: Path | str) -> None:
    """Remove batch state file safely.

    Args:
        state_file: Path to state file

    Raises:
        BatchStateError: If cleanup fails

    Example:
        >>> from path_utils import get_batch_state_file
        >>> cleanup_batch_state(get_batch_state_file())
    """
    # Convert to Path
    state_file = Path(state_file)

    # Validate path (security)
    try:
        state_file = validate_path(state_file, "batch state file", allow_missing=True)
    except ValueError as e:
        audit_log("batch_state_cleanup", "error", {
            "error": str(e),
            "path": str(state_file),
        })
        raise BatchStateError(str(e))

    # Acquire file lock
    lock = _get_file_lock(state_file)
    with lock:
        try:
            if state_file.exists():
                state_file.unlink()
                audit_log("batch_state_cleanup", "success", {
                    "path": str(state_file),
                })
        except OSError as e:
            audit_log("batch_state_cleanup", "error", {
                "error": str(e),
                "path": str(state_file),
            })
            raise BatchStateError(f"Failed to cleanup batch state: {e}")


# =============================================================================
# Retry Count Tracking (Issue #89)
# =============================================================================

def get_retry_count(state: BatchState, feature_index: int) -> int:
    """
    Get retry count for a specific feature.

    Args:
        state: Batch state
        feature_index: Index of feature

    Returns:
        Number of retry attempts (0 if never retried)

    Examples:
        >>> state = load_batch_state(state_file)
        >>> retry_count = get_retry_count(state, 0)
        >>> print(f"Feature 0 has been retried {retry_count} times")
    """
    return state.retry_attempts.get(feature_index, 0)


def increment_retry_count(state_file: Path | str, feature_index: int) -> None:
    """
    Increment retry count for a feature.

    Thread-safe update using file locking.

    Args:
        state_file: Path to batch state file
        feature_index: Index of feature to increment

    Examples:
        >>> increment_retry_count(state_file, 0)  # Increment retry count for feature 0
    """
    state_path = Path(state_file)

    with _get_file_lock(state_path):
        # Load current state
        state = load_batch_state(state_path)

        # Increment retry count
        current_count = state.retry_attempts.get(feature_index, 0)
        state.retry_attempts[feature_index] = current_count + 1

        # Update timestamp
        state.updated_at = datetime.utcnow().isoformat() + "Z"

        # Save updated state
        save_batch_state(state_path, state)

        # Audit log
        audit_log("retry_count_incremented", "info", {
            "feature_index": feature_index,
            "new_count": state.retry_attempts[feature_index],
        })


def mark_feature_status(
    state_file: Path | str,
    feature_index: int,
    status: str,
    error_message: Optional[str] = None,
    retry_count: Optional[int] = None,
) -> None:
    """
    Mark feature status (completed or failed) with optional retry tracking.

    Thread-safe update using file locking.

    Args:
        state_file: Path to batch state file
        feature_index: Index of feature to mark
        status: Status ("completed" or "failed")
        error_message: Error message if failed
        retry_count: Optional retry count to record

    Examples:
        >>> mark_feature_status(state_file, 0, "completed")
        >>> mark_feature_status(state_file, 1, "failed", "SyntaxError", retry_count=2)
    """
    state_path = Path(state_file)

    with _get_file_lock(state_path):
        # Load current state
        state = load_batch_state(state_path)

        if status == "completed":
            if feature_index not in state.completed_features:
                state.completed_features.append(feature_index)
            # Remove from failed if it was there (retry succeeded)
            state.failed_features = [
                f for f in state.failed_features
                if f.get("feature_index") != feature_index
            ]

        elif status == "failed":
            # Add to failed list if not already there
            if not any(f.get("feature_index") == feature_index for f in state.failed_features):
                failure_record = {
                    "feature_index": feature_index,
                    "error_message": error_message or "Unknown error",
                    "timestamp": datetime.utcnow().isoformat() + "Z",
                }
                if retry_count is not None:
                    failure_record["retry_count"] = retry_count
                state.failed_features.append(failure_record)

        # Update timestamp
        state.updated_at = datetime.utcnow().isoformat() + "Z"

        # Save updated state
        save_batch_state(state_path, state)

        # Audit log
        audit_log("feature_status_updated", "info", {
            "feature_index": feature_index,
            "status": status,
            "retry_count": retry_count,
        })


# =============================================================================
# Git Operations Tracking (Issue #93)
# =============================================================================

def record_git_operation(
    state: BatchState,
    feature_index: int,
    operation: str,
    success: bool,
    commit_sha: Optional[str] = None,
    branch: Optional[str] = None,
    remote: Optional[str] = None,
    pr_number: Optional[int] = None,
    pr_url: Optional[str] = None,
    error_message: Optional[str] = None,
    **kwargs
) -> BatchState:
    """
    Record git operation result for a feature.

    Updates the state object and returns it (immutable pattern).
    For batch workflow, this tracks commit/push/PR operations per feature.

    Args:
        state: Current batch state
        feature_index: Index of feature being processed
        operation: Operation type ('commit', 'push', 'pr')
        success: Whether operation succeeded
        commit_sha: Commit SHA (for commit operations)
        branch: Branch name
        remote: Remote name (for push operations)
        pr_number: PR number (for pr operations)
        pr_url: PR URL (for pr operations)
        error_message: Error message (for failures)
        **kwargs: Additional metadata

    Returns:
        Updated batch state with git operation recorded

    Examples:
        >>> state = load_batch_state(state_file)
        >>> state = record_git_operation(
        ...     state,
        ...     feature_index=0,
        ...     operation='commit',
        ...     success=True,
        ...     commit_sha='abc123',
        ...     branch='feature/test'
        ... )
        >>> save_batch_state(state_file, state)
    """
    # Validate operation type
    valid_operations = ['commit', 'push', 'pr']
    if operation not in valid_operations:
        raise ValueError(f"Invalid operation: {operation}. Must be one of {valid_operations}")

    # Validate feature_index
    if feature_index < 0 or feature_index >= state.total_features:
        raise ValueError(f"Invalid feature_index: {feature_index} (total: {state.total_features})")

    # Initialize feature git_operations if not exists
    if feature_index not in state.git_operations:
        state.git_operations[feature_index] = {}

    # Build operation record
    operation_record = {
        "success": success,
        "timestamp": datetime.utcnow().isoformat() + "Z",
    }

    # Add operation-specific metadata
    if commit_sha:
        operation_record["sha"] = commit_sha
    if branch:
        operation_record["branch"] = branch
    if remote:
        operation_record["remote"] = remote
    if pr_number is not None:
        operation_record["number"] = pr_number
    if pr_url:
        operation_record["url"] = pr_url
    if error_message:
        operation_record["error"] = error_message

    # Add any additional metadata from kwargs
    for key, value in kwargs.items():
        if key not in operation_record:
            operation_record[key] = value

    # Record operation
    state.git_operations[feature_index][operation] = operation_record

    # Update timestamp
    state.updated_at = datetime.utcnow().isoformat() + "Z"

    # Audit log
    audit_log("git_operation_recorded", "info", {
        "batch_id": state.batch_id,
        "feature_index": feature_index,
        "operation": operation,
        "success": success,
    })

    return state


def get_feature_git_status(
    state: BatchState,
    feature_index: int
) -> Optional[Dict[str, Any]]:
    """
    Get git operation status for a feature.

    Args:
        state: Current batch state
        feature_index: Index of feature

    Returns:
        Dict of git operations for feature, or None if no operations

    Examples:
        >>> state = load_batch_state(state_file)
        >>> status = get_feature_git_status(state, 0)
        >>> if status:
        ...     commit = status.get('commit', {})
        ...     if commit.get('success'):
        ...         print(f"Commit: {commit['sha']}")
    """
    return state.git_operations.get(feature_index)


# =============================================================================
# BatchStateManager Class (Backward Compatibility Wrapper)
# =============================================================================


class BatchStateManager:
    """Object-oriented wrapper for batch state functions.

    Provides backward compatibility for code expecting a class-based interface.
    All methods delegate to the functional API defined above.

    Examples:
        >>> manager = BatchStateManager()
        >>> state = manager.create_batch_state(["feature 1", "feature 2"])
        >>> manager.save_batch_state(state)
        >>> loaded = manager.load_batch_state()
    """

    def __init__(self, state_file: Optional[Path] = None):
        """Initialize manager with optional custom state file path.

        Args:
            state_file: Optional custom path for state file.
                       If None, uses default (.claude/batch_state.json)
                       Path is validated for security (CWE-22, CWE-59)

        Raises:
            ValueError: If state_file contains path traversal or is outside project
        """
        self.state_file = state_file if state_file else get_default_state_file()

        # Validate path if provided (security requirement)
        if state_file:
            from security_utils import validate_path
            self.state_file = validate_path(
                Path(state_file),
                "batch state file",
                allow_missing=True
            )

        # Create parent directory if it doesn't exist
        self.state_file.parent.mkdir(parents=True, exist_ok=True)

    def create_batch_state(
        self,
        features: List[str],
        batch_id: Optional[str] = None,
        issue_numbers: Optional[List[int]] = None
    ) -> BatchState:
        """Create new batch state (delegates to create_batch_state function).

        Args:
            features: List of feature descriptions
            batch_id: Optional custom batch ID
            issue_numbers: Optional list of GitHub issue numbers

        Returns:
            BatchState object
        """
        return create_batch_state(
            features=features,
            state_file=str(self.state_file),
            batch_id=batch_id,
            issue_numbers=issue_numbers
        )

    def create_batch(
        self,
        features: List[str],
        features_file: Optional[str] = None,
        batch_id: Optional[str] = None,
        issue_numbers: Optional[List[int]] = None
    ) -> BatchState:
        """Create new batch state (alias for create_batch_state).

        Args:
            features: List of feature descriptions
            features_file: Optional path to features file (for validation)
            batch_id: Optional custom batch ID
            issue_numbers: Optional list of GitHub issue numbers

        Returns:
            BatchState object

        Note:
            If features_file is provided, it is validated for security but not used
            (features list is the actual source of truth)
        """
        # Validate features_file if provided (security requirement)
        if features_file:
            from security_utils import validate_path
            validate_path(Path(features_file), "features file", allow_missing=True)

        return create_batch_state(
            features=features,
            state_file=str(self.state_file),
            batch_id=batch_id,
            issue_numbers=issue_numbers
        )

    def load_batch_state(self) -> BatchState:
        """Load batch state from file (delegates to load_batch_state function).

        Returns:
            BatchState object
        """
        return load_batch_state(self.state_file)

    def load_state(self) -> BatchState:
        """Alias for load_batch_state() for backward compatibility with tests.

        Returns:
            BatchState object
        """
        return self.load_batch_state()

    def save_batch_state(self, state: BatchState) -> None:
        """Save batch state to file (delegates to save_batch_state function).

        Args:
            state: BatchState object to save
        """
        save_batch_state(self.state_file, state)

    def save_state(self, state: BatchState) -> None:
        """Alias for save_batch_state() for backward compatibility with tests.

        Args:
            state: BatchState object to save
        """
        self.save_batch_state(state)

    def update_batch_progress(
        self,
        feature_index: int,
        status: str,
        tokens_consumed: int = 0
    ) -> None:
        """Update batch progress (delegates to update_batch_progress function).

        Args:
            feature_index: Index of completed feature
            status: "completed" or "failed"
            tokens_consumed: Estimated tokens consumed by this feature
        """
        update_batch_progress(
            self.state_file,
            feature_index,
            status,
            tokens_consumed
        )

    def record_auto_clear_event(
        self,
        feature_index: int,
        tokens_before_clear: int
    ) -> None:
        """Record auto-clear event (delegates to record_auto_clear_event function).

        Args:
            feature_index: Feature index when auto-clear triggered
            tokens_before_clear: Estimated tokens before clearing
        """
        record_auto_clear_event(
            self.state_file,
            feature_index,
            tokens_before_clear
        )

    def should_auto_clear(self) -> bool:
        """Check if auto-clear should trigger (delegates to should_auto_clear function).

        Returns:
            True if context should be cleared
        """
        state = self.load_batch_state()
        return should_auto_clear(state)

    def get_next_pending_feature(self) -> Optional[str]:
        """Get next pending feature (delegates to get_next_pending_feature function).

        Returns:
            Next feature description or None if all complete
        """
        state = self.load_batch_state()
        return get_next_pending_feature(state)

    def cleanup_batch_state(self) -> None:
        """Cleanup batch state file (delegates to cleanup_batch_state function)."""
        cleanup_batch_state(self.state_file)