TradingAgents/tradingagents/backtest/monte_carlo.py

497 lines
15 KiB
Python

"""
Monte Carlo simulation for backtesting.
This module implements Monte Carlo methods to assess the distribution of
potential outcomes and confidence intervals for backtest results.
"""
import logging
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any, Tuple
from decimal import Decimal
import pandas as pd
import numpy as np
from tqdm import tqdm
from .config import MonteCarloConfig
from .exceptions import MonteCarloError
logger = logging.getLogger(__name__)
@dataclass
class MonteCarloResults:
"""
Results from Monte Carlo simulation.
Attributes:
n_simulations: Number of simulations run
mean_final_value: Mean final portfolio value
median_final_value: Median final portfolio value
std_final_value: Standard deviation of final values
confidence_intervals: Confidence intervals for final value
worst_case: Worst case final value
best_case: Best case final value
probability_of_profit: Probability of positive return
simulated_paths: Sample of simulated equity curves
percentiles: Percentiles of final values
"""
n_simulations: int
mean_final_value: float
median_final_value: float
std_final_value: float
confidence_intervals: Dict[float, Tuple[float, float]]
worst_case: float
best_case: float
probability_of_profit: float
simulated_paths: Optional[pd.DataFrame] = None
percentiles: Dict[int, float] = field(default_factory=dict)
def __str__(self) -> str:
"""String representation."""
lines = [
"Monte Carlo Simulation Results",
"=" * 60,
f"Simulations: {self.n_simulations:,}",
f"Mean Final Value: ${self.mean_final_value:,.2f}",
f"Median Final Value: ${self.median_final_value:,.2f}",
f"Std Dev: ${self.std_final_value:,.2f}",
f"Probability of Profit: {self.probability_of_profit:.2%}",
"",
"Confidence Intervals:",
"-" * 60,
]
for level, (lower, upper) in sorted(self.confidence_intervals.items()):
lines.append(f"{level:.0%}: ${lower:,.2f} - ${upper:,.2f}")
lines.extend([
"",
"Extreme Cases:",
"-" * 60,
f"Best Case: ${self.best_case:,.2f}",
f"Worst Case: ${self.worst_case:,.2f}",
])
return "\n".join(lines)
class MonteCarloSimulator:
"""
Performs Monte Carlo simulations on backtest results.
This class uses various resampling methods to generate distributions
of potential outcomes and assess risk.
"""
def __init__(self, config: MonteCarloConfig):
"""
Initialize Monte Carlo simulator.
Args:
config: Monte Carlo configuration
"""
self.config = config
# Set random seed for reproducibility
if config.random_seed is not None:
np.random.seed(config.random_seed)
logger.info(f"MonteCarloSimulator initialized with {config.n_simulations} simulations")
def simulate(
self,
equity_curve: pd.Series,
trades: Optional[pd.DataFrame] = None,
initial_value: Optional[float] = None,
) -> MonteCarloResults:
"""
Run Monte Carlo simulation.
Args:
equity_curve: Historical equity curve
trades: DataFrame with trade information (required for trade resampling)
initial_value: Initial portfolio value (default: first value in equity_curve)
Returns:
MonteCarloResults
Raises:
MonteCarloError: If simulation fails
"""
logger.info(f"Running Monte Carlo simulation: {self.config.method}")
if initial_value is None:
initial_value = float(equity_curve.iloc[0])
try:
if self.config.method == 'resample_returns':
simulated_values = self._resample_returns(equity_curve, initial_value)
elif self.config.method == 'resample_trades':
if trades is None or trades.empty:
raise MonteCarloError("Trades data required for trade resampling")
simulated_values = self._resample_trades(trades, initial_value)
elif self.config.method == 'parametric':
simulated_values = self._parametric_simulation(equity_curve, initial_value)
else:
raise MonteCarloError(f"Unknown simulation method: {self.config.method}")
# Calculate statistics
results = self._calculate_statistics(simulated_values, initial_value)
logger.info("Monte Carlo simulation complete")
return results
except Exception as e:
raise MonteCarloError(f"Monte Carlo simulation failed: {e}")
def _resample_returns(
self,
equity_curve: pd.Series,
initial_value: float,
) -> np.ndarray:
"""
Simulate by resampling historical returns.
Args:
equity_curve: Historical equity curve
initial_value: Initial portfolio value
Returns:
Array of final values from simulations
"""
# Calculate returns
returns = equity_curve.pct_change().dropna().values
if len(returns) == 0:
raise MonteCarloError("No returns available for resampling")
n_periods = len(returns)
final_values = np.zeros(self.config.n_simulations)
for i in tqdm(range(self.config.n_simulations), desc="Monte Carlo simulation"):
# Resample returns with replacement
if self.config.preserve_order:
# Block resampling to preserve some order
block_size = min(20, n_periods // 10)
resampled_returns = self._block_resample(returns, n_periods, block_size)
else:
# Random resampling
resampled_returns = np.random.choice(returns, size=n_periods, replace=True)
# Calculate final value
final_value = initial_value * np.prod(1 + resampled_returns)
final_values[i] = final_value
return final_values
def _resample_trades(
self,
trades: pd.DataFrame,
initial_value: float,
) -> np.ndarray:
"""
Simulate by resampling trades.
Args:
trades: DataFrame with trade information
initial_value: Initial portfolio value
Returns:
Array of final values from simulations
"""
if 'pnl' not in trades.columns:
raise MonteCarloError("Trades must have 'pnl' column")
trade_returns = (trades['pnl'] / initial_value).values
n_trades = len(trade_returns)
if n_trades == 0:
raise MonteCarloError("No trades available for resampling")
final_values = np.zeros(self.config.n_simulations)
for i in tqdm(range(self.config.n_simulations), desc="Monte Carlo simulation"):
# Resample trades
if self.config.preserve_order:
# Sequential resampling with some randomness
resampled_returns = self._sequential_resample(trade_returns)
else:
# Random resampling
resampled_returns = np.random.choice(trade_returns, size=n_trades, replace=True)
# Calculate final value
cumulative_return = np.sum(resampled_returns)
final_value = initial_value * (1 + cumulative_return)
final_values[i] = final_value
return final_values
def _parametric_simulation(
self,
equity_curve: pd.Series,
initial_value: float,
) -> np.ndarray:
"""
Simulate using parametric distribution.
Assumes returns follow a normal distribution with estimated parameters.
Args:
equity_curve: Historical equity curve
initial_value: Initial portfolio value
Returns:
Array of final values from simulations
"""
# Calculate returns
returns = equity_curve.pct_change().dropna().values
if len(returns) == 0:
raise MonteCarloError("No returns available for parametric simulation")
# Estimate parameters
mean_return = np.mean(returns)
std_return = np.std(returns)
n_periods = len(returns)
final_values = np.zeros(self.config.n_simulations)
for i in tqdm(range(self.config.n_simulations), desc="Monte Carlo simulation"):
# Generate random returns from normal distribution
simulated_returns = np.random.normal(mean_return, std_return, n_periods)
# Calculate final value
final_value = initial_value * np.prod(1 + simulated_returns)
final_values[i] = final_value
return final_values
def _block_resample(
self,
data: np.ndarray,
target_length: int,
block_size: int,
) -> np.ndarray:
"""
Resample data in blocks to preserve some temporal structure.
Args:
data: Data to resample
target_length: Target length of resampled data
block_size: Size of blocks to resample
Returns:
Resampled data
"""
n_data = len(data)
n_blocks = (target_length + block_size - 1) // block_size
resampled = []
for _ in range(n_blocks):
# Random starting point
start_idx = np.random.randint(0, max(1, n_data - block_size + 1))
end_idx = min(start_idx + block_size, n_data)
block = data[start_idx:end_idx]
resampled.extend(block)
return np.array(resampled[:target_length])
def _sequential_resample(self, data: np.ndarray) -> np.ndarray:
"""
Resample while maintaining some sequential structure.
Args:
data: Data to resample
Returns:
Resampled data
"""
n_data = len(data)
resampled = np.zeros(n_data)
# Start with a random position
current_idx = np.random.randint(0, n_data)
for i in range(n_data):
resampled[i] = data[current_idx]
# Move to next position with some randomness
if np.random.random() < 0.8: # 80% chance to move sequentially
current_idx = (current_idx + 1) % n_data
else: # 20% chance to jump randomly
current_idx = np.random.randint(0, n_data)
return resampled
def _calculate_statistics(
self,
simulated_values: np.ndarray,
initial_value: float,
) -> MonteCarloResults:
"""
Calculate statistics from simulated values.
Args:
simulated_values: Array of final values
initial_value: Initial portfolio value
Returns:
MonteCarloResults
"""
# Basic statistics
mean_final = np.mean(simulated_values)
median_final = np.median(simulated_values)
std_final = np.std(simulated_values)
min_final = np.min(simulated_values)
max_final = np.max(simulated_values)
# Probability of profit
prob_profit = np.sum(simulated_values > initial_value) / len(simulated_values)
# Confidence intervals
confidence_intervals = {}
for level in self.config.confidence_levels:
alpha = 1 - level
lower_percentile = (alpha / 2) * 100
upper_percentile = (1 - alpha / 2) * 100
lower_bound = np.percentile(simulated_values, lower_percentile)
upper_bound = np.percentile(simulated_values, upper_percentile)
confidence_intervals[level] = (float(lower_bound), float(upper_bound))
# Percentiles
percentiles = {
p: float(np.percentile(simulated_values, p))
for p in [1, 5, 10, 25, 50, 75, 90, 95, 99]
}
# Store sample of simulated paths (for visualization)
# Note: This would require storing the full paths, not just final values
# For now, we'll skip this to save memory
results = MonteCarloResults(
n_simulations=self.config.n_simulations,
mean_final_value=float(mean_final),
median_final_value=float(median_final),
std_final_value=float(std_final),
confidence_intervals=confidence_intervals,
worst_case=float(min_final),
best_case=float(max_final),
probability_of_profit=float(prob_profit),
percentiles=percentiles,
)
return results
def simulate_paths(
self,
equity_curve: pd.Series,
n_paths: int = 100,
) -> pd.DataFrame:
"""
Simulate multiple equity curve paths.
Args:
equity_curve: Historical equity curve
n_paths: Number of paths to simulate
Returns:
DataFrame with simulated paths
"""
returns = equity_curve.pct_change().dropna()
n_periods = len(returns)
initial_value = equity_curve.iloc[0]
paths = np.zeros((n_periods, n_paths))
for i in range(n_paths):
# Resample returns
resampled_returns = np.random.choice(returns.values, size=n_periods, replace=True)
# Calculate path
path_values = initial_value * np.cumprod(1 + resampled_returns)
paths[:, i] = path_values
# Create DataFrame
paths_df = pd.DataFrame(
paths,
index=returns.index,
columns=[f'path_{i}' for i in range(n_paths)]
)
return paths_df
def value_at_risk(
self,
simulated_values: np.ndarray,
confidence_level: float = 0.95,
) -> float:
"""
Calculate Value at Risk (VaR).
Args:
simulated_values: Array of simulated final values
confidence_level: Confidence level (e.g., 0.95 for 95%)
Returns:
Value at Risk
"""
alpha = 1 - confidence_level
var = np.percentile(simulated_values, alpha * 100)
return float(var)
def conditional_value_at_risk(
self,
simulated_values: np.ndarray,
confidence_level: float = 0.95,
) -> float:
"""
Calculate Conditional Value at Risk (CVaR / Expected Shortfall).
Args:
simulated_values: Array of simulated final values
confidence_level: Confidence level (e.g., 0.95 for 95%)
Returns:
Conditional Value at Risk
"""
var = self.value_at_risk(simulated_values, confidence_level)
cvar = np.mean(simulated_values[simulated_values <= var])
return float(cvar)
def create_monte_carlo_config(
n_simulations: int = 10000,
method: str = "resample_returns",
confidence_levels: Optional[List[float]] = None,
random_seed: Optional[int] = None,
) -> MonteCarloConfig:
"""
Create a Monte Carlo configuration with sensible defaults.
Args:
n_simulations: Number of simulations
method: Simulation method
confidence_levels: Confidence levels for intervals
random_seed: Random seed for reproducibility
Returns:
MonteCarloConfig
"""
if confidence_levels is None:
confidence_levels = [0.90, 0.95, 0.99]
return MonteCarloConfig(
n_simulations=n_simulations,
method=method,
confidence_levels=confidence_levels,
random_seed=random_seed,
)