TradingAgents/.claude/lib/path_utils.py

376 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Path Utilities - Centralized project root detection and path resolution
This module provides centralized path resolution for tracking infrastructure:
- Dynamic PROJECT_ROOT detection (searches for .git/ or .claude/)
- Session directory resolution
- Batch state file resolution
- Directory creation with proper permissions
Fixes Issue #79: Hardcoded paths in tracking infrastructure
Security Features:
- All paths resolve from PROJECT_ROOT (not current working directory)
- Works from any subdirectory
- Creates directories with safe permissions (0o755)
- No hardcoded relative paths
Usage:
from path_utils import get_project_root, get_session_dir, get_batch_state_file
# Get project root
root = get_project_root()
# Get session directory (creates if missing)
session_dir = get_session_dir()
# Get batch state file path
state_file = get_batch_state_file()
Date: 2025-11-17
Issue: GitHub #79 (Tracking infrastructure hardcoded paths)
Agent: implementer
Design Patterns:
See library-design-patterns skill for standardized design patterns.
"""
import json
from pathlib import Path
from typing import Optional, List
# Cache for project root (avoid repeated filesystem searches)
_PROJECT_ROOT_CACHE: Optional[Path] = None
# Cache for policy file (avoid repeated filesystem searches)
_POLICY_FILE_CACHE: Optional[Path] = None
class PolicyFileNotFoundError(Exception):
"""Exception raised when policy file cannot be found in any location."""
pass
def find_project_root(
marker_files: Optional[List[str]] = None,
start_path: Optional[Path] = None
) -> Path:
"""Find project root by searching upward for marker files.
Searches from current working directory upward until it finds a directory
containing one of the marker files (.git/, .claude/, etc).
Search strategy:
- Prioritizes .git over .claude (searches all the way up for .git first)
- Only searches for .claude if .git not found anywhere
- This ensures git repos with nested .claude dirs work correctly
Args:
marker_files: List of marker files/directories to search for.
Defaults to [".git", ".claude"] (priority order)
start_path: Starting path for search. Defaults to current working directory.
Returns:
Path to project root (directory containing marker file)
Raises:
FileNotFoundError: If no marker file found (reached filesystem root)
Examples:
>>> root = find_project_root() # Search from cwd
>>> root = find_project_root(start_path=Path("/path/to/nested/dir"))
>>> root = find_project_root(marker_files=[".git", "setup.py"])
Security:
- No path traversal risk (only searches upward, never downward)
- Stops at filesystem root (prevents infinite loops)
- Validates marker files exist before returning
"""
if marker_files is None:
marker_files = [".git", ".claude"]
if start_path is None:
start_path = Path.cwd()
# Resolve to absolute path (handles symlinks)
start = start_path.resolve()
# Priority-based search: Search ALL the way up for each marker in order
# This ensures .git takes precedence over .claude even if .claude is closer
for marker in marker_files:
current = start
while True:
marker_path = current / marker
if marker_path.exists():
return current
# Move to parent directory
parent = current.parent
# If we've reached the filesystem root, stop this marker search
if parent == current:
break
current = parent
# If we get here, no markers were found
raise FileNotFoundError(
f"Could not find project root. Searched upward from {start_path} "
f"looking for: {', '.join(marker_files)}. "
f"Ensure you're running from within a git repository or have .claude/PROJECT.md"
)
def get_project_root(use_cache: bool = True) -> Path:
"""Get cached project root (or detect and cache it).
This function caches the project root to avoid repeated filesystem searches.
Safe to call multiple times - only searches once per process.
Args:
use_cache: If True, use cached value (default). If False, force re-detection.
Set to False in tests that change working directory.
Returns:
Path to project root
Raises:
FileNotFoundError: If no project root found
Examples:
>>> root = get_project_root()
>>> session_dir = root / "docs" / "sessions"
# In tests that change cwd
>>> root = get_project_root(use_cache=False)
Thread Safety:
Not thread-safe (uses module-level cache). If needed for multi-threading,
wrap with threading.Lock.
"""
global _PROJECT_ROOT_CACHE
if not use_cache or _PROJECT_ROOT_CACHE is None:
_PROJECT_ROOT_CACHE = find_project_root()
return _PROJECT_ROOT_CACHE
def get_session_dir(create: bool = True, use_cache: bool = True) -> Path:
"""Get session directory path (PROJECT_ROOT/docs/sessions).
Args:
create: If True, create directory if it doesn't exist (default: True)
use_cache: If True, use cached project root (default). Set False in tests.
Returns:
Path to session directory
Raises:
FileNotFoundError: If project root not found
OSError: If directory creation fails
Examples:
>>> session_dir = get_session_dir()
>>> session_file = session_dir / "20251117-session.md"
# In tests that change cwd
>>> session_dir = get_session_dir(use_cache=False)
Security:
- Creates with restrictive permissions (0o700 = rwx------)
- No path traversal risk (uses get_project_root())
"""
project_root = get_project_root(use_cache=use_cache)
session_dir = project_root / "docs" / "sessions"
if create and not session_dir.exists():
session_dir.mkdir(parents=True, exist_ok=True)
# Set restrictive permissions (owner only)
session_dir.chmod(0o700) # rwx------
return session_dir
def get_batch_state_file() -> Path:
"""Get batch state file path (PROJECT_ROOT/.claude/batch_state.json).
Note: Does NOT create the file (only returns path).
Directory (.claude/) is created if it doesn't exist.
Returns:
Path to batch state file
Raises:
FileNotFoundError: If project root not found
OSError: If directory creation fails
Examples:
>>> state_file = get_batch_state_file()
>>> from batch_state_manager import save_batch_state
>>> save_batch_state(state_file, state)
Security:
- Creates parent directory with safe permissions (0o755)
- No path traversal risk (uses get_project_root())
"""
project_root = get_project_root()
claude_dir = project_root / ".claude"
# Create .claude/ directory if missing
claude_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
return claude_dir / "batch_state.json"
def reset_project_root_cache() -> None:
"""Reset cached project root (for testing only).
Warning: Only use this in test teardown. In production, the cache should
persist for the lifetime of the process.
Examples:
>>> # In test teardown
>>> reset_project_root_cache()
"""
global _PROJECT_ROOT_CACHE
_PROJECT_ROOT_CACHE = None
def get_policy_file(use_cache: bool = True) -> Path:
"""Get policy file path via cascading lookup with fallback.
Cascading lookup order:
1. .claude/config/auto_approve_policy.json (project-local)
2. plugins/autonomous-dev/config/auto_approve_policy.json (plugin default)
3. Return path to minimal fallback (may not exist)
Security validations:
- Rejects symlinks (CWE-59)
- Prevents path traversal (CWE-22)
- Validates JSON format
- Handles permission errors gracefully
Args:
use_cache: If True, use cached value (default). If False, force re-detection.
Set to False in tests that change working directory.
Returns:
Path to policy file (validated and readable)
Examples:
>>> policy_file = get_policy_file()
>>> validator = ToolValidator(policy_file=policy_file)
# In tests that change cwd
>>> policy_file = get_policy_file(use_cache=False)
Thread Safety:
Not thread-safe (uses module-level cache). If needed for multi-threading,
wrap with threading.Lock.
Note:
This function prioritizes project-local policy over plugin default.
This enables per-project customization while maintaining a sensible default.
"""
global _POLICY_FILE_CACHE
if not use_cache or _POLICY_FILE_CACHE is None:
_POLICY_FILE_CACHE = _find_policy_file()
return _POLICY_FILE_CACHE
def _find_policy_file() -> Path:
"""Find policy file via cascading lookup.
Internal implementation for get_policy_file().
Returns:
Path to validated policy file
"""
try:
project_root = get_project_root()
except FileNotFoundError:
# No project root found - return plugin default path
# (may not exist, but that's okay - caller handles missing file)
plugin_path = Path(__file__).parent.parent / "config" / "auto_approve_policy.json"
return plugin_path
# Define cascading lookup locations
locations = [
project_root / ".claude" / "config" / "auto_approve_policy.json", # Project-local
project_root / "plugins" / "autonomous-dev" / "config" / "auto_approve_policy.json", # Plugin default
]
# Try each location in priority order
for policy_path in locations:
if _is_valid_policy_file(policy_path):
return policy_path
# No valid policy found - return minimal fallback path
# Return first location that doesn't exist (not symlink or invalid)
# This ensures we never return a path we rejected for security reasons
for policy_path in locations:
if not policy_path.exists():
return policy_path
# All locations exist but all rejected (symlinks, invalid JSON, etc.)
# Return project-local as last resort (caller will handle the issue)
return locations[0]
def _is_valid_policy_file(policy_path: Path) -> bool:
"""Validate policy file for security and format.
Checks:
- File exists
- Is not a symlink (CWE-59)
- Is a regular file (not directory)
- Is readable
- Contains valid JSON
Args:
policy_path: Path to validate
Returns:
True if valid, False otherwise
"""
# Check symlink FIRST (before exists, which follows symlinks)
# Reject symlinks (CWE-59: Improper Link Resolution Before File Access)
if policy_path.is_symlink():
return False
# Check existence (now we know it's not a symlink)
if not policy_path.exists():
return False
# Must be a regular file (not directory)
if not policy_path.is_file():
return False
# Check readability and validate JSON
try:
with open(policy_path, 'r') as f:
json.load(f)
return True
except (PermissionError, json.JSONDecodeError, OSError):
# Permission denied, invalid JSON, or other IO error
return False
def reset_policy_cache() -> None:
"""Reset cached policy file path (for testing only).
Warning: Only use this in test teardown. In production, the cache should
persist for the lifetime of the process.
Examples:
>>> # In test teardown
>>> reset_policy_cache()
"""
global _POLICY_FILE_CACHE
_POLICY_FILE_CACHE = None