376 lines
12 KiB
Python
376 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Path Utilities - Centralized project root detection and path resolution
|
|
|
|
This module provides centralized path resolution for tracking infrastructure:
|
|
- Dynamic PROJECT_ROOT detection (searches for .git/ or .claude/)
|
|
- Session directory resolution
|
|
- Batch state file resolution
|
|
- Directory creation with proper permissions
|
|
|
|
Fixes Issue #79: Hardcoded paths in tracking infrastructure
|
|
|
|
Security Features:
|
|
- All paths resolve from PROJECT_ROOT (not current working directory)
|
|
- Works from any subdirectory
|
|
- Creates directories with safe permissions (0o755)
|
|
- No hardcoded relative paths
|
|
|
|
Usage:
|
|
from path_utils import get_project_root, get_session_dir, get_batch_state_file
|
|
|
|
# Get project root
|
|
root = get_project_root()
|
|
|
|
# Get session directory (creates if missing)
|
|
session_dir = get_session_dir()
|
|
|
|
# Get batch state file path
|
|
state_file = get_batch_state_file()
|
|
|
|
Date: 2025-11-17
|
|
Issue: GitHub #79 (Tracking infrastructure hardcoded paths)
|
|
Agent: implementer
|
|
|
|
Design Patterns:
|
|
See library-design-patterns skill for standardized design patterns.
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Optional, List
|
|
|
|
|
|
# Cache for project root (avoid repeated filesystem searches)
|
|
_PROJECT_ROOT_CACHE: Optional[Path] = None
|
|
|
|
# Cache for policy file (avoid repeated filesystem searches)
|
|
_POLICY_FILE_CACHE: Optional[Path] = None
|
|
|
|
|
|
class PolicyFileNotFoundError(Exception):
|
|
"""Exception raised when policy file cannot be found in any location."""
|
|
pass
|
|
|
|
|
|
def find_project_root(
|
|
marker_files: Optional[List[str]] = None,
|
|
start_path: Optional[Path] = None
|
|
) -> Path:
|
|
"""Find project root by searching upward for marker files.
|
|
|
|
Searches from current working directory upward until it finds a directory
|
|
containing one of the marker files (.git/, .claude/, etc).
|
|
|
|
Search strategy:
|
|
- Prioritizes .git over .claude (searches all the way up for .git first)
|
|
- Only searches for .claude if .git not found anywhere
|
|
- This ensures git repos with nested .claude dirs work correctly
|
|
|
|
Args:
|
|
marker_files: List of marker files/directories to search for.
|
|
Defaults to [".git", ".claude"] (priority order)
|
|
start_path: Starting path for search. Defaults to current working directory.
|
|
|
|
Returns:
|
|
Path to project root (directory containing marker file)
|
|
|
|
Raises:
|
|
FileNotFoundError: If no marker file found (reached filesystem root)
|
|
|
|
Examples:
|
|
>>> root = find_project_root() # Search from cwd
|
|
>>> root = find_project_root(start_path=Path("/path/to/nested/dir"))
|
|
>>> root = find_project_root(marker_files=[".git", "setup.py"])
|
|
|
|
Security:
|
|
- No path traversal risk (only searches upward, never downward)
|
|
- Stops at filesystem root (prevents infinite loops)
|
|
- Validates marker files exist before returning
|
|
"""
|
|
if marker_files is None:
|
|
marker_files = [".git", ".claude"]
|
|
|
|
if start_path is None:
|
|
start_path = Path.cwd()
|
|
|
|
# Resolve to absolute path (handles symlinks)
|
|
start = start_path.resolve()
|
|
|
|
# Priority-based search: Search ALL the way up for each marker in order
|
|
# This ensures .git takes precedence over .claude even if .claude is closer
|
|
for marker in marker_files:
|
|
current = start
|
|
while True:
|
|
marker_path = current / marker
|
|
if marker_path.exists():
|
|
return current
|
|
|
|
# Move to parent directory
|
|
parent = current.parent
|
|
|
|
# If we've reached the filesystem root, stop this marker search
|
|
if parent == current:
|
|
break
|
|
|
|
current = parent
|
|
|
|
# If we get here, no markers were found
|
|
raise FileNotFoundError(
|
|
f"Could not find project root. Searched upward from {start_path} "
|
|
f"looking for: {', '.join(marker_files)}. "
|
|
f"Ensure you're running from within a git repository or have .claude/PROJECT.md"
|
|
)
|
|
|
|
|
|
def get_project_root(use_cache: bool = True) -> Path:
|
|
"""Get cached project root (or detect and cache it).
|
|
|
|
This function caches the project root to avoid repeated filesystem searches.
|
|
Safe to call multiple times - only searches once per process.
|
|
|
|
Args:
|
|
use_cache: If True, use cached value (default). If False, force re-detection.
|
|
Set to False in tests that change working directory.
|
|
|
|
Returns:
|
|
Path to project root
|
|
|
|
Raises:
|
|
FileNotFoundError: If no project root found
|
|
|
|
Examples:
|
|
>>> root = get_project_root()
|
|
>>> session_dir = root / "docs" / "sessions"
|
|
|
|
# In tests that change cwd
|
|
>>> root = get_project_root(use_cache=False)
|
|
|
|
Thread Safety:
|
|
Not thread-safe (uses module-level cache). If needed for multi-threading,
|
|
wrap with threading.Lock.
|
|
"""
|
|
global _PROJECT_ROOT_CACHE
|
|
|
|
if not use_cache or _PROJECT_ROOT_CACHE is None:
|
|
_PROJECT_ROOT_CACHE = find_project_root()
|
|
|
|
return _PROJECT_ROOT_CACHE
|
|
|
|
|
|
def get_session_dir(create: bool = True, use_cache: bool = True) -> Path:
|
|
"""Get session directory path (PROJECT_ROOT/docs/sessions).
|
|
|
|
Args:
|
|
create: If True, create directory if it doesn't exist (default: True)
|
|
use_cache: If True, use cached project root (default). Set False in tests.
|
|
|
|
Returns:
|
|
Path to session directory
|
|
|
|
Raises:
|
|
FileNotFoundError: If project root not found
|
|
OSError: If directory creation fails
|
|
|
|
Examples:
|
|
>>> session_dir = get_session_dir()
|
|
>>> session_file = session_dir / "20251117-session.md"
|
|
|
|
# In tests that change cwd
|
|
>>> session_dir = get_session_dir(use_cache=False)
|
|
|
|
Security:
|
|
- Creates with restrictive permissions (0o700 = rwx------)
|
|
- No path traversal risk (uses get_project_root())
|
|
"""
|
|
project_root = get_project_root(use_cache=use_cache)
|
|
session_dir = project_root / "docs" / "sessions"
|
|
|
|
if create and not session_dir.exists():
|
|
session_dir.mkdir(parents=True, exist_ok=True)
|
|
# Set restrictive permissions (owner only)
|
|
session_dir.chmod(0o700) # rwx------
|
|
|
|
return session_dir
|
|
|
|
|
|
def get_batch_state_file() -> Path:
|
|
"""Get batch state file path (PROJECT_ROOT/.claude/batch_state.json).
|
|
|
|
Note: Does NOT create the file (only returns path).
|
|
Directory (.claude/) is created if it doesn't exist.
|
|
|
|
Returns:
|
|
Path to batch state file
|
|
|
|
Raises:
|
|
FileNotFoundError: If project root not found
|
|
OSError: If directory creation fails
|
|
|
|
Examples:
|
|
>>> state_file = get_batch_state_file()
|
|
>>> from batch_state_manager import save_batch_state
|
|
>>> save_batch_state(state_file, state)
|
|
|
|
Security:
|
|
- Creates parent directory with safe permissions (0o755)
|
|
- No path traversal risk (uses get_project_root())
|
|
"""
|
|
project_root = get_project_root()
|
|
claude_dir = project_root / ".claude"
|
|
|
|
# Create .claude/ directory if missing
|
|
claude_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
|
|
|
|
return claude_dir / "batch_state.json"
|
|
|
|
|
|
def reset_project_root_cache() -> None:
|
|
"""Reset cached project root (for testing only).
|
|
|
|
Warning: Only use this in test teardown. In production, the cache should
|
|
persist for the lifetime of the process.
|
|
|
|
Examples:
|
|
>>> # In test teardown
|
|
>>> reset_project_root_cache()
|
|
"""
|
|
global _PROJECT_ROOT_CACHE
|
|
_PROJECT_ROOT_CACHE = None
|
|
|
|
|
|
def get_policy_file(use_cache: bool = True) -> Path:
|
|
"""Get policy file path via cascading lookup with fallback.
|
|
|
|
Cascading lookup order:
|
|
1. .claude/config/auto_approve_policy.json (project-local)
|
|
2. plugins/autonomous-dev/config/auto_approve_policy.json (plugin default)
|
|
3. Return path to minimal fallback (may not exist)
|
|
|
|
Security validations:
|
|
- Rejects symlinks (CWE-59)
|
|
- Prevents path traversal (CWE-22)
|
|
- Validates JSON format
|
|
- Handles permission errors gracefully
|
|
|
|
Args:
|
|
use_cache: If True, use cached value (default). If False, force re-detection.
|
|
Set to False in tests that change working directory.
|
|
|
|
Returns:
|
|
Path to policy file (validated and readable)
|
|
|
|
Examples:
|
|
>>> policy_file = get_policy_file()
|
|
>>> validator = ToolValidator(policy_file=policy_file)
|
|
|
|
# In tests that change cwd
|
|
>>> policy_file = get_policy_file(use_cache=False)
|
|
|
|
Thread Safety:
|
|
Not thread-safe (uses module-level cache). If needed for multi-threading,
|
|
wrap with threading.Lock.
|
|
|
|
Note:
|
|
This function prioritizes project-local policy over plugin default.
|
|
This enables per-project customization while maintaining a sensible default.
|
|
"""
|
|
global _POLICY_FILE_CACHE
|
|
|
|
if not use_cache or _POLICY_FILE_CACHE is None:
|
|
_POLICY_FILE_CACHE = _find_policy_file()
|
|
|
|
return _POLICY_FILE_CACHE
|
|
|
|
|
|
def _find_policy_file() -> Path:
|
|
"""Find policy file via cascading lookup.
|
|
|
|
Internal implementation for get_policy_file().
|
|
|
|
Returns:
|
|
Path to validated policy file
|
|
"""
|
|
try:
|
|
project_root = get_project_root()
|
|
except FileNotFoundError:
|
|
# No project root found - return plugin default path
|
|
# (may not exist, but that's okay - caller handles missing file)
|
|
plugin_path = Path(__file__).parent.parent / "config" / "auto_approve_policy.json"
|
|
return plugin_path
|
|
|
|
# Define cascading lookup locations
|
|
locations = [
|
|
project_root / ".claude" / "config" / "auto_approve_policy.json", # Project-local
|
|
project_root / "plugins" / "autonomous-dev" / "config" / "auto_approve_policy.json", # Plugin default
|
|
]
|
|
|
|
# Try each location in priority order
|
|
for policy_path in locations:
|
|
if _is_valid_policy_file(policy_path):
|
|
return policy_path
|
|
|
|
# No valid policy found - return minimal fallback path
|
|
# Return first location that doesn't exist (not symlink or invalid)
|
|
# This ensures we never return a path we rejected for security reasons
|
|
for policy_path in locations:
|
|
if not policy_path.exists():
|
|
return policy_path
|
|
|
|
# All locations exist but all rejected (symlinks, invalid JSON, etc.)
|
|
# Return project-local as last resort (caller will handle the issue)
|
|
return locations[0]
|
|
|
|
|
|
def _is_valid_policy_file(policy_path: Path) -> bool:
|
|
"""Validate policy file for security and format.
|
|
|
|
Checks:
|
|
- File exists
|
|
- Is not a symlink (CWE-59)
|
|
- Is a regular file (not directory)
|
|
- Is readable
|
|
- Contains valid JSON
|
|
|
|
Args:
|
|
policy_path: Path to validate
|
|
|
|
Returns:
|
|
True if valid, False otherwise
|
|
"""
|
|
# Check symlink FIRST (before exists, which follows symlinks)
|
|
# Reject symlinks (CWE-59: Improper Link Resolution Before File Access)
|
|
if policy_path.is_symlink():
|
|
return False
|
|
|
|
# Check existence (now we know it's not a symlink)
|
|
if not policy_path.exists():
|
|
return False
|
|
|
|
# Must be a regular file (not directory)
|
|
if not policy_path.is_file():
|
|
return False
|
|
|
|
# Check readability and validate JSON
|
|
try:
|
|
with open(policy_path, 'r') as f:
|
|
json.load(f)
|
|
return True
|
|
except (PermissionError, json.JSONDecodeError, OSError):
|
|
# Permission denied, invalid JSON, or other IO error
|
|
return False
|
|
|
|
|
|
def reset_policy_cache() -> None:
|
|
"""Reset cached policy file path (for testing only).
|
|
|
|
Warning: Only use this in test teardown. In production, the cache should
|
|
persist for the lifetime of the process.
|
|
|
|
Examples:
|
|
>>> # In test teardown
|
|
>>> reset_policy_cache()
|
|
"""
|
|
global _POLICY_FILE_CACHE
|
|
_POLICY_FILE_CACHE = None
|