#!/usr/bin/env python3 """ Validation Utilities - Tracking infrastructure security validation This module provides validation functions for tracking infrastructure: - Session path validation (prevent path traversal) - Agent name validation (alphanumeric only) - Message validation (length limits, no control characters) Fixes Issue #79: Security validation for tracking infrastructure Security Features: - Path traversal prevention (CWE-22) - Input sanitization - Length limits (prevent resource exhaustion) - Control character filtering Usage: from validation import validate_session_path, validate_agent_name, validate_message # Validate session path safe_path = validate_session_path(user_path) # Validate agent name safe_name = validate_agent_name(name) # Validate message safe_msg = validate_message(message) Date: 2025-11-17 Issue: GitHub #79 (Tracking infrastructure hardcoded paths) Agent: implementer Design Patterns: See library-design-patterns skill for standardized design patterns. """ import re from pathlib import Path from typing import Union # Constants MAX_MESSAGE_LENGTH = 10000 # 10KB max message length MAX_AGENT_NAME_LENGTH = 255 # Maximum length for agent names def validate_session_path(path: Union[str, Path], purpose: str = "session tracking") -> Path: """Validate session path to prevent path traversal. Args: path: Path to validate (string or Path object) purpose: Description of what the path is for (for error messages) Returns: Validated Path object Raises: ValueError: If path contains path traversal sequences or is outside allowed directories Security: - Prevents path traversal (CWE-22) - Rejects symlinks (CWE-59) - Validates path is within PROJECT_ROOT/docs/sessions or PROJECT_ROOT/.claude Examples: >>> path = validate_session_path("/project/docs/sessions/file.json") >>> path = validate_session_path("../../etc/passwd") # Raises ValueError """ # Import here to avoid circular dependency from path_utils import get_project_root # Convert to Path if isinstance(path, str): path = Path(path) # Check for obvious path traversal if ".." in str(path): raise ValueError( f"Path traversal detected in {purpose}: {path}\n" f"Paths cannot contain '..' sequences.\n" f"Expected: Absolute paths within PROJECT_ROOT" ) # Reject symlinks BEFORE resolving (CWE-59) # Check on original path before resolve() to catch symlinks if path.is_symlink(): raise ValueError( f"Symlinks not allowed (path outside project) for {purpose}: {path}\n" f"Symlinks can be used for path traversal attacks." ) # Resolve to absolute path (handles relative paths) try: resolved_path = path.resolve() except (OSError, RuntimeError) as e: raise ValueError(f"Failed to resolve path for {purpose}: {path}\nError: {e}") # Get project root try: project_root = get_project_root() except FileNotFoundError as e: raise ValueError(f"Cannot validate path - project root not found: {e}") # Check if path is within allowed directories allowed_dirs = [ project_root / "docs" / "sessions", project_root / ".claude", ] # Check if resolved path is under any allowed directory is_allowed = False for allowed_dir in allowed_dirs: try: # Check if path is relative to allowed_dir (throws ValueError if not) resolved_path.relative_to(allowed_dir) is_allowed = True break except ValueError: continue if not is_allowed: raise ValueError( f"Path outside project for {purpose}: {path}\n" f"Resolved to: {resolved_path}\n" f"Allowed directories:\n" + "\n".join(f" - {d}" for d in allowed_dirs) ) # Symlink check already performed above (before resolve()) return resolved_path def validate_agent_name(name: str, purpose: str = "agent tracking") -> str: """Validate agent name (alphanumeric, hyphen, underscore only). Args: name: Agent name to validate purpose: Description of what the name is for (for error messages) Returns: Validated agent name (stripped of whitespace) Raises: ValueError: If name is empty, too long, or contains invalid characters TypeError: If name is not a string Security: - Prevents injection attacks (only allows safe characters) - Length validation (prevents resource exhaustion) - No control characters Examples: >>> validate_agent_name("researcher") 'researcher' >>> validate_agent_name("test-agent_v2") 'test-agent_v2' >>> validate_agent_name("../../etc/passwd") # Raises ValueError >>> validate_agent_name("") # Raises ValueError """ # Type check if not isinstance(name, str): raise TypeError( f"Agent name must be string for {purpose}, got {type(name).__name__}" ) # Strip whitespace name = name.strip() # Empty check if not name: raise ValueError( f"Agent name cannot be empty for {purpose}\n" f"Expected: Non-empty string (alphanumeric, hyphen, underscore)" ) # Length check if len(name) > MAX_AGENT_NAME_LENGTH: raise ValueError( f"Agent name too long for {purpose}: {len(name)} chars\n" f"Maximum: {MAX_AGENT_NAME_LENGTH} chars\n" f"Name: {name[:50]}..." ) # Character validation (alphanumeric, hyphen, underscore only) if not re.match(r'^[a-zA-Z0-9_-]+$', name): raise ValueError( f"Invalid agent name for {purpose}: {name}\n" f"Agent names must contain only:\n" f" - Letters (a-z, A-Z)\n" f" - Numbers (0-9)\n" f" - Hyphens (-)\n" f" - Underscores (_)\n" f"Got: {name}" ) return name def validate_message(message: str, purpose: str = "message logging") -> str: """Validate message (length limits, no control characters). Args: message: Message to validate purpose: Description of what the message is for (for error messages) Returns: Validated message (stripped of leading/trailing whitespace) Raises: ValueError: If message is too long or contains control characters TypeError: If message is not a string Security: - Length validation (prevents resource exhaustion) - Control character filtering (prevents log injection) - No path traversal sequences Examples: >>> validate_message("Research complete") 'Research complete' >>> validate_message("x" * 20000) # Raises ValueError (too long) >>> validate_message("Test\\x00message") # Raises ValueError (control chars) """ # Type check if not isinstance(message, str): raise TypeError( f"Message must be string for {purpose}, got {type(message).__name__}" ) # Strip leading/trailing whitespace message = message.strip() # Length check if len(message) > MAX_MESSAGE_LENGTH: raise ValueError( f"Message too long for {purpose}: {len(message)} chars\n" f"Maximum: {MAX_MESSAGE_LENGTH} chars (10KB)\n" f"Message: {message[:100]}..." ) # Control character check (ASCII 0-31 except tab, newline, carriage return) # Allow: \t (9), \n (10), \r (13) # Reject: \x00-\x08, \x0b-\x0c, \x0e-\x1f control_chars = re.findall(r'[\x00-\x08\x0b-\x0c\x0e-\x1f]', message) if control_chars: # Get unique control char codes char_codes = sorted(set(ord(c) for c in control_chars)) raise ValueError( f"Message contains control characters for {purpose}\n" f"Control characters found (ASCII codes): {char_codes}\n" f"These can be used for log injection attacks.\n" f"Message (first 100 chars): {message[:100]}" ) return message