""" Walk-forward analysis for backtesting. This module implements walk-forward optimization to test strategy robustness and detect overfitting by splitting data into in-sample and out-of-sample periods. """ import logging from dataclasses import dataclass, field from datetime import datetime, timedelta from typing import Dict, List, Optional, Any, Callable, Tuple from decimal import Decimal import pandas as pd import numpy as np from tqdm import tqdm from .config import BacktestConfig, WalkForwardConfig from .performance import PerformanceMetrics from .exceptions import OptimizationError logger = logging.getLogger(__name__) @dataclass class WalkForwardWindow: """ Represents a single walk-forward window. Attributes: window_id: Window identifier in_sample_start: In-sample start date in_sample_end: In-sample end date out_sample_start: Out-of-sample start date out_sample_end: Out-of-sample end date best_params: Best parameters from in-sample optimization in_sample_metrics: In-sample performance metrics out_sample_metrics: Out-of-sample performance metrics """ window_id: int in_sample_start: datetime in_sample_end: datetime out_sample_start: datetime out_sample_end: datetime best_params: Optional[Dict[str, Any]] = None in_sample_metrics: Optional[PerformanceMetrics] = None out_sample_metrics: Optional[PerformanceMetrics] = None def to_dict(self) -> Dict[str, Any]: """Convert to dictionary.""" return { 'window_id': self.window_id, 'in_sample_start': self.in_sample_start.strftime('%Y-%m-%d'), 'in_sample_end': self.in_sample_end.strftime('%Y-%m-%d'), 'out_sample_start': self.out_sample_start.strftime('%Y-%m-%d'), 'out_sample_end': self.out_sample_end.strftime('%Y-%m-%d'), 'best_params': self.best_params, 'in_sample_sharpe': self.in_sample_metrics.sharpe_ratio if self.in_sample_metrics else None, 'out_sample_sharpe': self.out_sample_metrics.sharpe_ratio if self.out_sample_metrics else None, } @dataclass class WalkForwardResults: """ Results from walk-forward analysis. Attributes: windows: List of walk-forward windows combined_metrics: Combined out-of-sample metrics efficiency_ratio: Walk-forward efficiency ratio overfitting_score: Overfitting score (0-1, lower is better) """ windows: List[WalkForwardWindow] combined_metrics: PerformanceMetrics efficiency_ratio: float overfitting_score: float def summary(self) -> pd.DataFrame: """Get summary DataFrame of all windows.""" return pd.DataFrame([w.to_dict() for w in self.windows]) def __str__(self) -> str: """String representation.""" lines = [ "Walk-Forward Analysis Results", "=" * 60, f"Number of Windows: {len(self.windows)}", f"WF Efficiency Ratio: {self.efficiency_ratio:.2f}", f"Overfitting Score: {self.overfitting_score:.2f}", "", "Combined Out-of-Sample Metrics:", "-" * 60, f"Sharpe Ratio: {self.combined_metrics.sharpe_ratio:.2f}", f"Total Return: {self.combined_metrics.total_return:.2%}", f"Max Drawdown: {self.combined_metrics.max_drawdown:.2%}", ] return "\n".join(lines) class WalkForwardAnalyzer: """ Performs walk-forward analysis. This class splits the backtest period into multiple windows, optimizes parameters on in-sample data, and tests on out-of-sample data. """ def __init__(self, wf_config: WalkForwardConfig): """ Initialize walk-forward analyzer. Args: wf_config: Walk-forward configuration """ self.config = wf_config logger.info("WalkForwardAnalyzer initialized") def analyze( self, backtest_func: Callable, param_grid: Dict[str, List[Any]], tickers: List[str], start_date: str, end_date: str, initial_capital: Decimal = Decimal("100000"), ) -> WalkForwardResults: """ Perform walk-forward analysis. Args: backtest_func: Function that runs backtest with given parameters Should have signature: (params, tickers, start, end, capital) -> (metrics, equity, trades) param_grid: Dictionary of parameter names to lists of values tickers: List of tickers to test start_date: Overall start date end_date: Overall end date initial_capital: Initial capital Returns: WalkForwardResults Raises: OptimizationError: If optimization fails """ logger.info("Starting walk-forward analysis") # Generate windows windows = self._generate_windows(start_date, end_date) logger.info(f"Generated {len(windows)} walk-forward windows") # Process each window for window in tqdm(windows, desc="Walk-forward windows"): try: # Optimize on in-sample data best_params, is_metrics = self._optimize_window( backtest_func, param_grid, tickers, window.in_sample_start, window.in_sample_end, initial_capital, ) window.best_params = best_params window.in_sample_metrics = is_metrics # Test on out-of-sample data oos_metrics, _, _ = backtest_func( best_params, tickers, window.out_sample_start.strftime('%Y-%m-%d'), window.out_sample_end.strftime('%Y-%m-%d'), initial_capital, ) window.out_sample_metrics = oos_metrics logger.info( f"Window {window.window_id}: " f"IS Sharpe={is_metrics.sharpe_ratio:.2f}, " f"OOS Sharpe={oos_metrics.sharpe_ratio:.2f}" ) except Exception as e: logger.error(f"Failed to process window {window.window_id}: {e}") raise OptimizationError(f"Walk-forward analysis failed: {e}") # Calculate combined metrics combined_metrics = self._combine_oos_metrics(windows) # Calculate efficiency ratio efficiency_ratio = self._calculate_efficiency_ratio(windows) # Calculate overfitting score overfitting_score = self._calculate_overfitting_score(windows) results = WalkForwardResults( windows=windows, combined_metrics=combined_metrics, efficiency_ratio=efficiency_ratio, overfitting_score=overfitting_score, ) logger.info("Walk-forward analysis complete") return results def _generate_windows( self, start_date: str, end_date: str, ) -> List[WalkForwardWindow]: """Generate walk-forward windows.""" windows = [] window_id = 0 start = datetime.strptime(start_date, '%Y-%m-%d') end = datetime.strptime(end_date, '%Y-%m-%d') current_start = start while True: # Calculate in-sample period is_start = current_start is_end = is_start + timedelta(days=self.config.in_sample_months * 30) # Calculate out-of-sample period oos_start = is_end + timedelta(days=1) oos_end = oos_start + timedelta(days=self.config.out_sample_months * 30) # Check if we're past the end date if oos_end > end: break # Create window window = WalkForwardWindow( window_id=window_id, in_sample_start=is_start, in_sample_end=is_end, out_sample_start=oos_start, out_sample_end=oos_end, ) windows.append(window) window_id += 1 # Move to next window if self.config.anchored: # Anchored: keep same start, extend end current_start = start else: # Rolling: move forward by step_months current_start = current_start + timedelta(days=self.config.step_months * 30) return windows def _optimize_window( self, backtest_func: Callable, param_grid: Dict[str, List[Any]], tickers: List[str], start_date: datetime, end_date: datetime, initial_capital: Decimal, ) -> Tuple[Dict[str, Any], PerformanceMetrics]: """ Optimize parameters for a single window. Args: backtest_func: Backtest function param_grid: Parameter grid tickers: Tickers to test start_date: Start date end_date: End date initial_capital: Initial capital Returns: (best_params, best_metrics) tuple """ # Generate parameter combinations param_combinations = self._generate_param_combinations(param_grid) best_params = None best_score = float('-inf') best_metrics = None # Test each parameter combination for params in param_combinations: try: metrics, _, _ = backtest_func( params, tickers, start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'), initial_capital, ) # Get optimization score score = self._get_optimization_score(metrics) if score > best_score: best_score = score best_params = params best_metrics = metrics except Exception as e: logger.warning(f"Failed to test params {params}: {e}") continue if best_params is None: raise OptimizationError("No valid parameter combinations found") return best_params, best_metrics def _generate_param_combinations( self, param_grid: Dict[str, List[Any]] ) -> List[Dict[str, Any]]: """Generate all combinations of parameters.""" if not param_grid: return [{}] import itertools keys = list(param_grid.keys()) values = list(param_grid.values()) combinations = [] for combo in itertools.product(*values): combinations.append(dict(zip(keys, combo))) return combinations def _get_optimization_score(self, metrics: PerformanceMetrics) -> float: """Get optimization score based on configured metric.""" metric_map = { 'sharpe': metrics.sharpe_ratio, 'sortino': metrics.sortino_ratio, 'calmar': metrics.calmar_ratio, 'return': metrics.annualized_return, 'max_drawdown': -metrics.max_drawdown, # Negative because we want to minimize } return metric_map.get(self.config.optimization_metric, metrics.sharpe_ratio) def _combine_oos_metrics(self, windows: List[WalkForwardWindow]) -> PerformanceMetrics: """Combine out-of-sample metrics from all windows.""" # This is a simplified combination - in practice, you'd want to # concatenate the actual equity curves and recalculate oos_metrics = [w.out_sample_metrics for w in windows if w.out_sample_metrics] if not oos_metrics: raise OptimizationError("No out-of-sample metrics available") # Average the metrics (simplified approach) combined = PerformanceMetrics( total_return=np.mean([m.total_return for m in oos_metrics]), annualized_return=np.mean([m.annualized_return for m in oos_metrics]), cumulative_return=np.mean([m.cumulative_return for m in oos_metrics]), sharpe_ratio=np.mean([m.sharpe_ratio for m in oos_metrics]), sortino_ratio=np.mean([m.sortino_ratio for m in oos_metrics]), calmar_ratio=np.mean([m.calmar_ratio for m in oos_metrics]), omega_ratio=np.mean([m.omega_ratio for m in oos_metrics]), volatility=np.mean([m.volatility for m in oos_metrics]), downside_deviation=np.mean([m.downside_deviation for m in oos_metrics]), max_drawdown=np.mean([m.max_drawdown for m in oos_metrics]), avg_drawdown=np.mean([m.avg_drawdown for m in oos_metrics]), max_drawdown_duration=int(np.mean([m.max_drawdown_duration for m in oos_metrics])), total_trades=sum([m.total_trades for m in oos_metrics]), winning_trades=sum([m.winning_trades for m in oos_metrics]), losing_trades=sum([m.losing_trades for m in oos_metrics]), win_rate=np.mean([m.win_rate for m in oos_metrics]), profit_factor=np.mean([m.profit_factor for m in oos_metrics]), avg_win=np.mean([m.avg_win for m in oos_metrics]), avg_loss=np.mean([m.avg_loss for m in oos_metrics]), avg_trade=np.mean([m.avg_trade for m in oos_metrics]), best_trade=max([m.best_trade for m in oos_metrics]), worst_trade=min([m.worst_trade for m in oos_metrics]), ) return combined def _calculate_efficiency_ratio(self, windows: List[WalkForwardWindow]) -> float: """ Calculate walk-forward efficiency ratio. This is the ratio of out-of-sample performance to in-sample performance. A ratio close to 1.0 indicates the strategy performs similarly in-sample and out-of-sample (good). A ratio much lower than 1.0 indicates overfitting. """ is_scores = [] oos_scores = [] for window in windows: if window.in_sample_metrics and window.out_sample_metrics: is_score = self._get_optimization_score(window.in_sample_metrics) oos_score = self._get_optimization_score(window.out_sample_metrics) is_scores.append(is_score) oos_scores.append(oos_score) if not is_scores or not oos_scores: return 0.0 avg_is_score = np.mean(is_scores) avg_oos_score = np.mean(oos_scores) if avg_is_score == 0: return 0.0 return avg_oos_score / avg_is_score def _calculate_overfitting_score(self, windows: List[WalkForwardWindow]) -> float: """ Calculate overfitting score. This measures how much the performance degrades from in-sample to out-of-sample. Lower scores indicate less overfitting. Returns value between 0 and 1 (0 = no overfitting, 1 = severe overfitting) """ degradations = [] for window in windows: if window.in_sample_metrics and window.out_sample_metrics: is_score = self._get_optimization_score(window.in_sample_metrics) oos_score = self._get_optimization_score(window.out_sample_metrics) if is_score > 0: degradation = (is_score - oos_score) / is_score degradations.append(max(0, degradation)) # Clip at 0 if not degradations: return 0.0 # Average degradation return min(1.0, np.mean(degradations)) def create_walk_forward_config( in_sample_months: int = 12, out_sample_months: int = 3, optimization_metric: str = "sharpe", anchored: bool = False, ) -> WalkForwardConfig: """ Create a walk-forward configuration with sensible defaults. Args: in_sample_months: Months for training out_sample_months: Months for testing optimization_metric: Metric to optimize anchored: Whether to use anchored windows Returns: WalkForwardConfig """ return WalkForwardConfig( in_sample_months=in_sample_months, out_sample_months=out_sample_months, optimization_metric=optimization_metric, anchored=anchored, )