497 lines
15 KiB
Python
497 lines
15 KiB
Python
"""
|
|
Monte Carlo simulation for backtesting.
|
|
|
|
This module implements Monte Carlo methods to assess the distribution of
|
|
potential outcomes and confidence intervals for backtest results.
|
|
"""
|
|
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, List, Optional, Any, Tuple
|
|
from decimal import Decimal
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
from tqdm import tqdm
|
|
|
|
from .config import MonteCarloConfig
|
|
from .exceptions import MonteCarloError
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class MonteCarloResults:
|
|
"""
|
|
Results from Monte Carlo simulation.
|
|
|
|
Attributes:
|
|
n_simulations: Number of simulations run
|
|
mean_final_value: Mean final portfolio value
|
|
median_final_value: Median final portfolio value
|
|
std_final_value: Standard deviation of final values
|
|
confidence_intervals: Confidence intervals for final value
|
|
worst_case: Worst case final value
|
|
best_case: Best case final value
|
|
probability_of_profit: Probability of positive return
|
|
simulated_paths: Sample of simulated equity curves
|
|
percentiles: Percentiles of final values
|
|
"""
|
|
n_simulations: int
|
|
mean_final_value: float
|
|
median_final_value: float
|
|
std_final_value: float
|
|
confidence_intervals: Dict[float, Tuple[float, float]]
|
|
worst_case: float
|
|
best_case: float
|
|
probability_of_profit: float
|
|
simulated_paths: Optional[pd.DataFrame] = None
|
|
percentiles: Dict[int, float] = field(default_factory=dict)
|
|
|
|
def __str__(self) -> str:
|
|
"""String representation."""
|
|
lines = [
|
|
"Monte Carlo Simulation Results",
|
|
"=" * 60,
|
|
f"Simulations: {self.n_simulations:,}",
|
|
f"Mean Final Value: ${self.mean_final_value:,.2f}",
|
|
f"Median Final Value: ${self.median_final_value:,.2f}",
|
|
f"Std Dev: ${self.std_final_value:,.2f}",
|
|
f"Probability of Profit: {self.probability_of_profit:.2%}",
|
|
"",
|
|
"Confidence Intervals:",
|
|
"-" * 60,
|
|
]
|
|
|
|
for level, (lower, upper) in sorted(self.confidence_intervals.items()):
|
|
lines.append(f"{level:.0%}: ${lower:,.2f} - ${upper:,.2f}")
|
|
|
|
lines.extend([
|
|
"",
|
|
"Extreme Cases:",
|
|
"-" * 60,
|
|
f"Best Case: ${self.best_case:,.2f}",
|
|
f"Worst Case: ${self.worst_case:,.2f}",
|
|
])
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
class MonteCarloSimulator:
|
|
"""
|
|
Performs Monte Carlo simulations on backtest results.
|
|
|
|
This class uses various resampling methods to generate distributions
|
|
of potential outcomes and assess risk.
|
|
"""
|
|
|
|
def __init__(self, config: MonteCarloConfig):
|
|
"""
|
|
Initialize Monte Carlo simulator.
|
|
|
|
Args:
|
|
config: Monte Carlo configuration
|
|
"""
|
|
self.config = config
|
|
|
|
# Set random seed for reproducibility
|
|
if config.random_seed is not None:
|
|
np.random.seed(config.random_seed)
|
|
|
|
logger.info(f"MonteCarloSimulator initialized with {config.n_simulations} simulations")
|
|
|
|
def simulate(
|
|
self,
|
|
equity_curve: pd.Series,
|
|
trades: Optional[pd.DataFrame] = None,
|
|
initial_value: Optional[float] = None,
|
|
) -> MonteCarloResults:
|
|
"""
|
|
Run Monte Carlo simulation.
|
|
|
|
Args:
|
|
equity_curve: Historical equity curve
|
|
trades: DataFrame with trade information (required for trade resampling)
|
|
initial_value: Initial portfolio value (default: first value in equity_curve)
|
|
|
|
Returns:
|
|
MonteCarloResults
|
|
|
|
Raises:
|
|
MonteCarloError: If simulation fails
|
|
"""
|
|
logger.info(f"Running Monte Carlo simulation: {self.config.method}")
|
|
|
|
if initial_value is None:
|
|
initial_value = float(equity_curve.iloc[0])
|
|
|
|
try:
|
|
if self.config.method == 'resample_returns':
|
|
simulated_values = self._resample_returns(equity_curve, initial_value)
|
|
|
|
elif self.config.method == 'resample_trades':
|
|
if trades is None or trades.empty:
|
|
raise MonteCarloError("Trades data required for trade resampling")
|
|
simulated_values = self._resample_trades(trades, initial_value)
|
|
|
|
elif self.config.method == 'parametric':
|
|
simulated_values = self._parametric_simulation(equity_curve, initial_value)
|
|
|
|
else:
|
|
raise MonteCarloError(f"Unknown simulation method: {self.config.method}")
|
|
|
|
# Calculate statistics
|
|
results = self._calculate_statistics(simulated_values, initial_value)
|
|
|
|
logger.info("Monte Carlo simulation complete")
|
|
return results
|
|
|
|
except Exception as e:
|
|
raise MonteCarloError(f"Monte Carlo simulation failed: {e}")
|
|
|
|
def _resample_returns(
|
|
self,
|
|
equity_curve: pd.Series,
|
|
initial_value: float,
|
|
) -> np.ndarray:
|
|
"""
|
|
Simulate by resampling historical returns.
|
|
|
|
Args:
|
|
equity_curve: Historical equity curve
|
|
initial_value: Initial portfolio value
|
|
|
|
Returns:
|
|
Array of final values from simulations
|
|
"""
|
|
# Calculate returns
|
|
returns = equity_curve.pct_change().dropna().values
|
|
|
|
if len(returns) == 0:
|
|
raise MonteCarloError("No returns available for resampling")
|
|
|
|
n_periods = len(returns)
|
|
final_values = np.zeros(self.config.n_simulations)
|
|
|
|
for i in tqdm(range(self.config.n_simulations), desc="Monte Carlo simulation"):
|
|
# Resample returns with replacement
|
|
if self.config.preserve_order:
|
|
# Block resampling to preserve some order
|
|
block_size = min(20, n_periods // 10)
|
|
resampled_returns = self._block_resample(returns, n_periods, block_size)
|
|
else:
|
|
# Random resampling
|
|
resampled_returns = np.random.choice(returns, size=n_periods, replace=True)
|
|
|
|
# Calculate final value
|
|
final_value = initial_value * np.prod(1 + resampled_returns)
|
|
final_values[i] = final_value
|
|
|
|
return final_values
|
|
|
|
def _resample_trades(
|
|
self,
|
|
trades: pd.DataFrame,
|
|
initial_value: float,
|
|
) -> np.ndarray:
|
|
"""
|
|
Simulate by resampling trades.
|
|
|
|
Args:
|
|
trades: DataFrame with trade information
|
|
initial_value: Initial portfolio value
|
|
|
|
Returns:
|
|
Array of final values from simulations
|
|
"""
|
|
if 'pnl' not in trades.columns:
|
|
raise MonteCarloError("Trades must have 'pnl' column")
|
|
|
|
trade_returns = (trades['pnl'] / initial_value).values
|
|
n_trades = len(trade_returns)
|
|
|
|
if n_trades == 0:
|
|
raise MonteCarloError("No trades available for resampling")
|
|
|
|
final_values = np.zeros(self.config.n_simulations)
|
|
|
|
for i in tqdm(range(self.config.n_simulations), desc="Monte Carlo simulation"):
|
|
# Resample trades
|
|
if self.config.preserve_order:
|
|
# Sequential resampling with some randomness
|
|
resampled_returns = self._sequential_resample(trade_returns)
|
|
else:
|
|
# Random resampling
|
|
resampled_returns = np.random.choice(trade_returns, size=n_trades, replace=True)
|
|
|
|
# Calculate final value
|
|
cumulative_return = np.sum(resampled_returns)
|
|
final_value = initial_value * (1 + cumulative_return)
|
|
final_values[i] = final_value
|
|
|
|
return final_values
|
|
|
|
def _parametric_simulation(
|
|
self,
|
|
equity_curve: pd.Series,
|
|
initial_value: float,
|
|
) -> np.ndarray:
|
|
"""
|
|
Simulate using parametric distribution.
|
|
|
|
Assumes returns follow a normal distribution with estimated parameters.
|
|
|
|
Args:
|
|
equity_curve: Historical equity curve
|
|
initial_value: Initial portfolio value
|
|
|
|
Returns:
|
|
Array of final values from simulations
|
|
"""
|
|
# Calculate returns
|
|
returns = equity_curve.pct_change().dropna().values
|
|
|
|
if len(returns) == 0:
|
|
raise MonteCarloError("No returns available for parametric simulation")
|
|
|
|
# Estimate parameters
|
|
mean_return = np.mean(returns)
|
|
std_return = np.std(returns)
|
|
n_periods = len(returns)
|
|
|
|
final_values = np.zeros(self.config.n_simulations)
|
|
|
|
for i in tqdm(range(self.config.n_simulations), desc="Monte Carlo simulation"):
|
|
# Generate random returns from normal distribution
|
|
simulated_returns = np.random.normal(mean_return, std_return, n_periods)
|
|
|
|
# Calculate final value
|
|
final_value = initial_value * np.prod(1 + simulated_returns)
|
|
final_values[i] = final_value
|
|
|
|
return final_values
|
|
|
|
def _block_resample(
|
|
self,
|
|
data: np.ndarray,
|
|
target_length: int,
|
|
block_size: int,
|
|
) -> np.ndarray:
|
|
"""
|
|
Resample data in blocks to preserve some temporal structure.
|
|
|
|
Args:
|
|
data: Data to resample
|
|
target_length: Target length of resampled data
|
|
block_size: Size of blocks to resample
|
|
|
|
Returns:
|
|
Resampled data
|
|
"""
|
|
n_data = len(data)
|
|
n_blocks = (target_length + block_size - 1) // block_size
|
|
|
|
resampled = []
|
|
for _ in range(n_blocks):
|
|
# Random starting point
|
|
start_idx = np.random.randint(0, max(1, n_data - block_size + 1))
|
|
end_idx = min(start_idx + block_size, n_data)
|
|
block = data[start_idx:end_idx]
|
|
resampled.extend(block)
|
|
|
|
return np.array(resampled[:target_length])
|
|
|
|
def _sequential_resample(self, data: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Resample while maintaining some sequential structure.
|
|
|
|
Args:
|
|
data: Data to resample
|
|
|
|
Returns:
|
|
Resampled data
|
|
"""
|
|
n_data = len(data)
|
|
resampled = np.zeros(n_data)
|
|
|
|
# Start with a random position
|
|
current_idx = np.random.randint(0, n_data)
|
|
|
|
for i in range(n_data):
|
|
resampled[i] = data[current_idx]
|
|
|
|
# Move to next position with some randomness
|
|
if np.random.random() < 0.8: # 80% chance to move sequentially
|
|
current_idx = (current_idx + 1) % n_data
|
|
else: # 20% chance to jump randomly
|
|
current_idx = np.random.randint(0, n_data)
|
|
|
|
return resampled
|
|
|
|
def _calculate_statistics(
|
|
self,
|
|
simulated_values: np.ndarray,
|
|
initial_value: float,
|
|
) -> MonteCarloResults:
|
|
"""
|
|
Calculate statistics from simulated values.
|
|
|
|
Args:
|
|
simulated_values: Array of final values
|
|
initial_value: Initial portfolio value
|
|
|
|
Returns:
|
|
MonteCarloResults
|
|
"""
|
|
# Basic statistics
|
|
mean_final = np.mean(simulated_values)
|
|
median_final = np.median(simulated_values)
|
|
std_final = np.std(simulated_values)
|
|
min_final = np.min(simulated_values)
|
|
max_final = np.max(simulated_values)
|
|
|
|
# Probability of profit
|
|
prob_profit = np.sum(simulated_values > initial_value) / len(simulated_values)
|
|
|
|
# Confidence intervals
|
|
confidence_intervals = {}
|
|
for level in self.config.confidence_levels:
|
|
alpha = 1 - level
|
|
lower_percentile = (alpha / 2) * 100
|
|
upper_percentile = (1 - alpha / 2) * 100
|
|
|
|
lower_bound = np.percentile(simulated_values, lower_percentile)
|
|
upper_bound = np.percentile(simulated_values, upper_percentile)
|
|
|
|
confidence_intervals[level] = (float(lower_bound), float(upper_bound))
|
|
|
|
# Percentiles
|
|
percentiles = {
|
|
p: float(np.percentile(simulated_values, p))
|
|
for p in [1, 5, 10, 25, 50, 75, 90, 95, 99]
|
|
}
|
|
|
|
# Store sample of simulated paths (for visualization)
|
|
# Note: This would require storing the full paths, not just final values
|
|
# For now, we'll skip this to save memory
|
|
|
|
results = MonteCarloResults(
|
|
n_simulations=self.config.n_simulations,
|
|
mean_final_value=float(mean_final),
|
|
median_final_value=float(median_final),
|
|
std_final_value=float(std_final),
|
|
confidence_intervals=confidence_intervals,
|
|
worst_case=float(min_final),
|
|
best_case=float(max_final),
|
|
probability_of_profit=float(prob_profit),
|
|
percentiles=percentiles,
|
|
)
|
|
|
|
return results
|
|
|
|
def simulate_paths(
|
|
self,
|
|
equity_curve: pd.Series,
|
|
n_paths: int = 100,
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Simulate multiple equity curve paths.
|
|
|
|
Args:
|
|
equity_curve: Historical equity curve
|
|
n_paths: Number of paths to simulate
|
|
|
|
Returns:
|
|
DataFrame with simulated paths
|
|
"""
|
|
returns = equity_curve.pct_change().dropna()
|
|
n_periods = len(returns)
|
|
initial_value = equity_curve.iloc[0]
|
|
|
|
paths = np.zeros((n_periods, n_paths))
|
|
|
|
for i in range(n_paths):
|
|
# Resample returns
|
|
resampled_returns = np.random.choice(returns.values, size=n_periods, replace=True)
|
|
|
|
# Calculate path
|
|
path_values = initial_value * np.cumprod(1 + resampled_returns)
|
|
paths[:, i] = path_values
|
|
|
|
# Create DataFrame
|
|
paths_df = pd.DataFrame(
|
|
paths,
|
|
index=returns.index,
|
|
columns=[f'path_{i}' for i in range(n_paths)]
|
|
)
|
|
|
|
return paths_df
|
|
|
|
def value_at_risk(
|
|
self,
|
|
simulated_values: np.ndarray,
|
|
confidence_level: float = 0.95,
|
|
) -> float:
|
|
"""
|
|
Calculate Value at Risk (VaR).
|
|
|
|
Args:
|
|
simulated_values: Array of simulated final values
|
|
confidence_level: Confidence level (e.g., 0.95 for 95%)
|
|
|
|
Returns:
|
|
Value at Risk
|
|
"""
|
|
alpha = 1 - confidence_level
|
|
var = np.percentile(simulated_values, alpha * 100)
|
|
return float(var)
|
|
|
|
def conditional_value_at_risk(
|
|
self,
|
|
simulated_values: np.ndarray,
|
|
confidence_level: float = 0.95,
|
|
) -> float:
|
|
"""
|
|
Calculate Conditional Value at Risk (CVaR / Expected Shortfall).
|
|
|
|
Args:
|
|
simulated_values: Array of simulated final values
|
|
confidence_level: Confidence level (e.g., 0.95 for 95%)
|
|
|
|
Returns:
|
|
Conditional Value at Risk
|
|
"""
|
|
var = self.value_at_risk(simulated_values, confidence_level)
|
|
cvar = np.mean(simulated_values[simulated_values <= var])
|
|
return float(cvar)
|
|
|
|
|
|
def create_monte_carlo_config(
|
|
n_simulations: int = 10000,
|
|
method: str = "resample_returns",
|
|
confidence_levels: Optional[List[float]] = None,
|
|
random_seed: Optional[int] = None,
|
|
) -> MonteCarloConfig:
|
|
"""
|
|
Create a Monte Carlo configuration with sensible defaults.
|
|
|
|
Args:
|
|
n_simulations: Number of simulations
|
|
method: Simulation method
|
|
confidence_levels: Confidence levels for intervals
|
|
random_seed: Random seed for reproducibility
|
|
|
|
Returns:
|
|
MonteCarloConfig
|
|
"""
|
|
if confidence_levels is None:
|
|
confidence_levels = [0.90, 0.95, 0.99]
|
|
|
|
return MonteCarloConfig(
|
|
n_simulations=n_simulations,
|
|
method=method,
|
|
confidence_levels=confidence_levels,
|
|
random_seed=random_seed,
|
|
)
|