change to longShort

This commit is contained in:
Zichen Liu 2025-11-07 16:59:55 -06:00
parent e99138f5b9
commit 7df8ae203f
6 changed files with 1371 additions and 0 deletions

View File

@ -0,0 +1,66 @@
from .baseline_strategies import (
BuyAndHoldStrategy,
MACDStrategy,
KDJRSIStrategy,
ZMRStrategy,
SMAStrategy,
get_all_baseline_strategies
)
from .metrics import (
calculate_cumulative_return,
calculate_annualized_return,
calculate_sharpe_ratio,
calculate_maximum_drawdown,
calculate_all_metrics,
create_comparison_table
)
from .backtest import (
BacktestEngine,
TradingAgentsBacktester,
load_stock_data
)
from .visualize import (
plot_cumulative_returns,
plot_transaction_history,
plot_metrics_comparison,
plot_drawdown,
create_summary_report
)
from .run_evaluation import run_evaluation
__all__ = [
# Strategies
'BuyAndHoldStrategy',
'MACDStrategy',
'KDJRSIStrategy',
'ZMRStrategy',
'SMAStrategy',
'get_all_baseline_strategies',
# Metrics
'calculate_cumulative_return',
'calculate_annualized_return',
'calculate_sharpe_ratio',
'calculate_maximum_drawdown',
'calculate_all_metrics',
'create_comparison_table',
# Backtesting
'BacktestEngine',
'TradingAgentsBacktester',
'load_stock_data',
# Visualization
'plot_cumulative_returns',
'plot_transaction_history',
'plot_metrics_comparison',
'plot_drawdown',
'create_summary_report',
# Main evaluation
'run_evaluation',
]

View File

@ -0,0 +1,251 @@
"""
Backtesting engine for TradingAgents and baseline strategies.
Both TradingAgents and rule-based strategies use identical return calculation logic:
1. Generate signals/actions: 1 (BUY), 0 (HOLD), -1 (SELL)
2. Convert actions to positions: 1 (long), 0 (flat)
3. Calculate returns: strategy_return = position.shift(1) * market_return
This ensures apples-to-apples comparison across all strategies.
"""
import pandas as pd
import numpy as np
from typing import Dict, List
from pathlib import Path
import json
STD_FIELDS = {"Open", "High", "Low", "Close", "Adj Close", "Volume"}
class TradingAgentsBacktester:
"""Backtest engine for TradingAgents framework."""
def __init__(self, trading_agents_graph, initial_capital=100000, output_dir=None):
self.graph = trading_agents_graph
self.initial_capital = float(initial_capital)
self.name = "TradingAgents"
self.output_dir = output_dir
def backtest(self, ticker: str, start_date: str, end_date: str, data: pd.DataFrame) -> pd.DataFrame:
"""
Backtest TradingAgents using the same return calculation logic as rule-based strategies.
Process:
1. Collect signals (actions: 1=BUY, 0=HOLD, -1=SELL) for all dates
2. Convert actions to positions (0=flat, 1=long) using same logic as baselines
3. Calculate returns as: strategy_return = position.shift(1) * market_return
"""
# Restrict to window
df = data.loc[start_date:end_date].copy()
decisions: List[Dict] = []
signals = pd.Series(0, index=df.index, dtype=float)
print(f"\nRunning TradingAgents backtest on {ticker} from {start_date} to {end_date}")
print(f"Total trading days: {len(df)}")
print("-" * 80)
# Step 1: Collect all signals/decisions
for i, (date, row) in enumerate(df.iterrows()):
date_str = date.strftime("%Y-%m-%d")
price = float(row["Close"])
# Get decision from TradingAgents graph
try:
print(f"\n[{i+1}/{len(df)}] {date_str} ... ", end="")
final_state, decision = self.graph.propagate(ticker, date_str)
print(f"Decision: {decision}")
signal = self._parse_decision(decision)
decisions.append({"date": date_str, "decision": decision, "signal": signal, "price": price})
except Exception as e:
print(f"Error: {e}")
signal = 0
decisions.append({"date": date_str, "decision": "ERROR", "signal": 0, "price": price, "error": str(e)})
signals.loc[date] = signal
# Step 2: Convert actions to positions (same logic as baseline strategies)
position = self._actions_to_position(signals)
# Step 3: Calculate returns using standardized logic
close = pd.to_numeric(df["Close"], errors="coerce")
market_ret = close.pct_change().fillna(0.0)
exposure = position.shift(1).fillna(0.0) # Yesterday's position determines today's exposure
strat_ret = (exposure * market_ret).astype(float)
cumret = (1.0 + strat_ret).cumprod()
portval = self.initial_capital * cumret
# Build portfolio DataFrame with same structure as baseline strategies
portfolio = pd.DataFrame(index=df.index)
portfolio["action"] = signals # 1=BUY, 0=HOLD, -1=SELL
portfolio["position"] = position # 1=long, 0=flat
portfolio["close"] = close
if "Volume" in df.columns:
vol = df["Volume"]
if isinstance(vol, pd.DataFrame) and vol.shape[1] == 1:
vol = vol.iloc[:, 0]
if isinstance(vol, pd.Series):
portfolio["Volume"] = vol
portfolio["market_return"] = market_ret
portfolio["strategy_return"] = strat_ret
portfolio["cumulative_return"] = cumret
portfolio["portfolio_value"] = portval
portfolio["trade_delta"] = portfolio["position"].diff().fillna(0.0) # +1=buy, -1=sell
self._save_decisions_log(ticker, decisions, start_date, end_date)
return portfolio
@staticmethod
def _actions_to_position(actions: pd.Series) -> pd.Series:
"""
Convert action series to a long-only position series in {0,1}.
Same logic as baseline strategies for consistency.
"""
a = actions.astype(float).fillna(0.0).clip(-1, 1).values
pos = np.zeros_like(a, dtype=float)
for i in range(len(a)):
if i == 0:
pos[i] = 1.0 if a[i] > 0 else 0.0
else:
if a[i] > 0: # buy → long
pos[i] = 1.0
elif a[i] < 0: # sell → flat
pos[i] = 0.0
else: # hold → keep previous
pos[i] = pos[i-1]
return pd.Series(pos, index=actions.index, name="position")
def _parse_decision(self, decision: str) -> int:
"""
Parse decision to signal.
We interpret:
- contains 'BUY' or 'LONG' -> 1
- contains 'SELL' or 'EXIT' -> -1 (we use -1 as 'close to cash' here)
- otherwise HOLD -> 0
"""
d = str(decision).upper()
if "BUY" in d or "LONG" in d:
return 1
if "SELL" in d or "EXIT" in d or "CLOSE" in d:
return -1
return 0
def _save_decisions_log(self, ticker: str, decisions: List[Dict], start_date: str, end_date: str):
# Use output_dir if provided, otherwise use default
if self.output_dir:
out = Path(self.output_dir) / ticker / "TradingAgents"
else:
out = Path(f"eval_results/{ticker}/TradingAgents")
out.mkdir(parents=True, exist_ok=True)
fp = out / f"decisions_{start_date}_to_{end_date}.json"
with open(fp, "w") as f:
json.dump({
"strategy": "TradingAgents",
"ticker": ticker,
"start_date": start_date,
"end_date": end_date,
"total_days": len(decisions),
"decisions": decisions
}, f, indent=2)
print(f" ✓ Saved TradingAgents detailed decisions to: {fp}")
class BacktestEngine:
"""Engine to run and compare multiple strategies."""
def __init__(self, data: pd.DataFrame, initial_capital: float = 100000):
self.data = data
self.initial_capital = float(initial_capital)
self.results: Dict[str, pd.DataFrame] = {}
def run_strategy(self, strategy, start_date: str = None, end_date: str = None, label = None) -> pd.DataFrame:
data_filtered = self.data.loc[start_date:end_date] if (start_date and end_date) else self.data
print(f"\nRunning {strategy.name}...")
portfolio = strategy.backtest(data_filtered)
self.results[label or strategy.name] = portfolio
return portfolio
def run_all_strategies(self, strategies: Dict, start_date: str = None, end_date: str = None):
for name, strategy in strategies.items():
try:
self.run_strategy(strategy, start_date, end_date)
print(f"{name} completed")
except Exception as e:
print(f"{name} failed: {e}")
def get_results(self) -> Dict[str, pd.DataFrame]:
return self.results
def load_stock_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame:
try:
import yfinance as yf
# Normalize accidental ('A','A','P','L') / ['A','A','P','L']
if isinstance(ticker, (list, tuple)) and all(isinstance(c, str) and len(c) == 1 for c in ticker):
ticker = "".join(ticker)
if not isinstance(ticker, str):
raise ValueError("Pass a single ticker symbol as a string, e.g., 'AAPL'.")
df = yf.download(ticker, start=start_date, end=end_date, progress=False)
if df.empty:
raise ValueError(f"No data found for {ticker}")
return df
except Exception as e:
print(f"Error loading data: {e}")
raise
def standardize_single_ticker(df: pd.DataFrame, ticker: str | None = None) -> pd.DataFrame:
"""Return a single-ticker OHLCV DataFrame with simple columns.
Works with yfinance single or multi-ticker outputs.
"""
df = df.copy()
# If columns are MultiIndex (common with multi-ticker yfinance)
if isinstance(df.columns, pd.MultiIndex):
# Figure out which level is the field (Open/High/...) and which is ticker
lvl0 = set(map(str, df.columns.get_level_values(0)))
lvl1 = set(map(str, df.columns.get_level_values(1)))
if len(STD_FIELDS & lvl0) > 0:
field_level, ticker_level = 0, 1
elif len(STD_FIELDS & lvl1) > 0:
field_level, ticker_level = 1, 0
else:
raise ValueError("Cannot detect OHLCV field level in MultiIndex columns.")
available = list(pd.Index(df.columns.get_level_values(ticker_level)).unique())
# Normalize weird ticker inputs like ('A','A','P','L') -> 'AAPL'
if isinstance(ticker, (list, tuple)) and all(isinstance(c, str) and len(c) == 1 for c in ticker):
ticker = "".join(ticker)
if ticker is None:
if len(available) != 1:
raise ValueError(f"Multi-ticker DataFrame. Pick one with ticker=..., available={available}")
ticker = available[0]
if str(ticker) not in map(str, available):
raise ValueError(f"Ticker {ticker!r} not in columns. Available: {available}")
# Slice to that ticker and drop the ticker level
df = df.xs(ticker, axis=1, level=ticker_level)
# Map Adj Close -> Close if Close missing
if "Close" not in df.columns and "Adj Close" in df.columns:
df = df.rename(columns={"Adj Close": "Close"})
# Final sanity
req = ["Open", "High", "Low", "Close"]
missing = [c for c in req if c not in df.columns]
if missing:
raise ValueError(f"Data missing columns: {missing}")
# Ensure 'Close' is a Series (not 1-col DataFrame)
close = df["Close"]
if isinstance(close, pd.DataFrame) and close.shape[1] == 1:
df["Close"] = close.iloc[:, 0]
return df

View File

@ -0,0 +1,185 @@
import pandas as pd
import numpy as np
from abc import ABC, abstractmethod
class BaseStrategy(ABC):
"""Base class for trading strategies (long-only, action-based)."""
def __init__(self, initial_capital=100000):
self.initial_capital = float(initial_capital)
self.name = self.__class__.__name__
def _close_series(self, data: pd.DataFrame) -> pd.Series:
close = data["Close"]
if isinstance(close, pd.DataFrame):
if close.shape[1] == 1:
close = close.iloc[:, 0]
else:
raise ValueError("Multiple 'Close' columns detected. Pass single-ticker data.")
return pd.to_numeric(close, errors="coerce")
@abstractmethod
def generate_signals(self, data: pd.DataFrame) -> pd.Series:
"""
Generate *actions* by date:
1 = BUY (open / go long, or stay long)
0 = HOLD (no change)
-1 = SELL (exit to flat)
Shorting is NOT allowed.
"""
pass
def _prep_ohlcv(self, data: pd.DataFrame) -> pd.DataFrame:
req = ["Open", "High", "Low", "Close"]
for col in req:
if col not in data.columns:
raise ValueError(f"Data missing column '{col}'")
return data.copy()
@staticmethod
def _actions_to_position(actions: pd.Series) -> pd.Series:
"""Convert action series to a long-only position series in {0,1}."""
a = actions.astype(float).fillna(0.0).clip(-1, 1).values
pos = np.zeros_like(a, dtype=float)
for i in range(len(a)):
if i == 0:
pos[i] = a[i] # origin position = signal
else:
if a[i] == 0: # HOLD
pos[i] = pos[i-1]
else:
pos[i] = a[i] # LONG or SHORT
return pd.Series(pos, index=actions.index, name="position")
def backtest(self, data: pd.DataFrame) -> pd.DataFrame:
df = self._prep_ohlcv(data)
# 1) get actions (1, 0, -1)
actions = self.generate_signals(df).reindex(df.index).fillna(0).clip(-1, 1).astype(float)
# 2) map actions → long-only position {0,1}
position = self._actions_to_position(actions)
# 3) compute returns (note: sell today → flat tomorrow → 0 return tomorrow)
close = self._close_series(df)
market_ret = close.pct_change().fillna(0.0)
exposure = position.shift(1).fillna(0.0) # use yesterday's position
strat_ret = (exposure * market_ret).astype(float)
cumret = (1.0 + strat_ret).cumprod()
portval = self.initial_capital * cumret
portfolio = pd.DataFrame(index=df.index)
portfolio["action"] = actions # 1 buy / 0 hold / -1 sell
portfolio["position"] = position # 1 long / 0 flat
portfolio["close"] = close
if "Volume" in df.columns:
vol = df["Volume"]
if isinstance(vol, pd.DataFrame) and vol.shape[1] == 1:
vol = vol.iloc[:, 0]
if isinstance(vol, pd.Series):
portfolio["Volume"] = vol
portfolio["market_return"] = market_ret
portfolio["strategy_return"] = strat_ret
portfolio["cumulative_return"] = cumret
portfolio["portfolio_value"] = portval
portfolio["trade_delta"] = portfolio["position"].diff().fillna(0.0) # +1 buy, -1 sell
return portfolio
class BuyAndHoldStrategy(BaseStrategy):
"""Buy on day 1 and hold long (no shorting)."""
def generate_signals(self, data: pd.DataFrame) -> pd.Series:
a = pd.Series(0.0, index=data.index)
if len(a) > 0:
a.iloc[0] = 1.0 # buy once at start
return a
class MACDStrategy(BaseStrategy):
"""MACD(12,26,9) Contrarian, long-onlyMACD>signal → SELL(退出)MACD<signal → BUY(做多)."""
def generate_signals(self, data):
df = data.copy()
ema_fast = df["Close"].ewm(span=12, adjust=False).mean()
ema_slow = df["Close"].ewm(span=26, adjust=False).mean()
macd = ema_fast - ema_slow
signal = macd.ewm(span=9, adjust=False).mean()
diff = macd - signal
a = pd.Series(0.0, index=df.index)
a[diff > 0] = -1.0 # 卖出/退出(之前是做空)
a[diff < 0] = 1.0 # 买入/做多
return a
class KDJRSIStrategy(BaseStrategy):
"""KDJ + RSI 逆势逻辑(长多-only超买 → 卖出;超卖 → 买入"""
def generate_signals(self, data):
df = data.copy()
# === RSI ===
delta = df["Close"].diff()
up, down = delta.clip(lower=0), -delta.clip(upper=0)
rs = up.ewm(span=14, adjust=False).mean() / down.ewm(span=14, adjust=False).mean().replace(0, np.nan)
df["rsi"] = 100 - 100 / (1 + rs)
# === KDJ ===
low = df["Low"].rolling(9).min()
high = df["High"].rolling(9).max()
denom = (high - low).replace(0, np.nan)
rsv = 100 * (df["Close"] - low) / denom
k = rsv.ewm(com=2, adjust=False).mean()
df["kdj_k"] = k
# === Actions ===
a = pd.Series(0.0, index=df.index)
# 收紧阈值RSI>75,K>85 → 卖出RSI<25,K<15 → 买入
a[(df["rsi"] > 75) & (df["kdj_k"] > 85)] = -1.0
a[(df["rsi"] < 25) & (df["kdj_k"] < 15)] = 1.0
return a
class ZMRStrategy(BaseStrategy):
def generate_signals(self, data):
close = self._close_series(data)
mean = close.rolling(50).mean()
std = close.rolling(50).std()
z = (close - mean) / std.replace(0, np.nan)
a = pd.Series(0.0, index=data.index)
a[z > 1.3] = -1.0 # 高估 → 卖出/退出
a[z < -1.3] = 1.0 # 低估 → 买入/做多
return a
class SMAStrategy(BaseStrategy):
def __init__(self, initial_capital=100000, short_window=5, long_window=20):
super().__init__(initial_capital)
self.short_window = int(short_window)
self.long_window = int(long_window)
def generate_signals(self, data: pd.DataFrame) -> pd.Series:
close = self._close_series(data)
short = close.rolling(window=self.short_window, min_periods=self.short_window).mean()
long_ = close.rolling(window=self.long_window, min_periods=self.long_window).mean()
a = pd.Series(0.0, index=data.index)
a[short > long_] = 1.0
a[short < long_] = -1.0
return a
def get_all_baseline_strategies(initial_capital=100000):
"""Get all baseline strategies for comparison (long-only, action-based)."""
return {
"BuyAndHold": BuyAndHoldStrategy(initial_capital),
"MACD": MACDStrategy(initial_capital),
"KDJ&RSI": KDJRSIStrategy(initial_capital),
"ZMR": ZMRStrategy(initial_capital),
"SMA": SMAStrategy(initial_capital),
}

View File

@ -0,0 +1,116 @@
"""
Evaluation metrics for trading strategies.
Implements: Cumulative Return, Annualized Return, Sharpe Ratio, Maximum Drawdown
"""
import pandas as pd
import numpy as np
from typing import Dict
def _require_cols(df: pd.DataFrame, cols):
missing = [c for c in cols if c not in df.columns]
if missing:
raise ValueError(f"Portfolio missing columns: {missing}")
def calculate_cumulative_return(portfolio: pd.DataFrame) -> float:
"""CR% = (V_end / V_start - 1) * 100"""
_require_cols(portfolio, ["portfolio_value"])
v_start = float(portfolio["portfolio_value"].iloc[0])
v_end = float(portfolio["portfolio_value"].iloc[-1])
if v_start <= 0:
return 0.0
return (v_end / v_start - 1.0) * 100.0
def calculate_annualized_return(portfolio: pd.DataFrame, trading_days: int | None = None) -> float:
"""AR% = ((V_end / V_start) ** (1/years) - 1) * 100 with 252 trading days/year."""
_require_cols(portfolio, ["portfolio_value"])
v_start = float(portfolio["portfolio_value"].iloc[0])
v_end = float(portfolio["portfolio_value"].iloc[-1])
if v_start <= 0 or v_end <= 0:
return 0.0
if trading_days is None:
trading_days = len(portfolio)
years = trading_days / 252.0
if years <= 0:
return 0.0
return ((v_end / v_start) ** (1.0 / years) - 1.0) * 100.0
def calculate_sharpe_ratio(portfolio: pd.DataFrame, risk_free_rate: float = 0.02) -> float:
"""
SR = (E[r] - r_f) / stdev(r), where r are *daily* strategy returns,
annualized using 252 trading days (paper S1.2.3).
"""
_require_cols(portfolio, ["strategy_return"])
r = portfolio["strategy_return"].dropna().astype(float)
if len(r) < 2 or r.std() == 0:
return 0.0
mean_ann = r.mean() * 252.0
std_ann = r.std(ddof=1) * np.sqrt(252.0)
if std_ann == 0:
return 0.0
return (mean_ann - risk_free_rate) / std_ann
def calculate_maximum_drawdown(portfolio: pd.DataFrame) -> float:
"""MDD% = max drawdown on portfolio_value (peak->trough) * 100"""
_require_cols(portfolio, ["portfolio_value"])
values = portfolio["portfolio_value"].astype(float)
running_max = values.cummax()
drawdown = (values - running_max) / running_max
return float(drawdown.min() * -100.0)
def calculate_win_rate(portfolio: pd.DataFrame) -> float:
"""% days where strategy_return > 0"""
_require_cols(portfolio, ["strategy_return"])
r = portfolio["strategy_return"].dropna()
if len(r) == 0:
return 0.0
return 100.0 * (r > 0).sum() / len(r)
def calculate_profit_factor(portfolio: pd.DataFrame) -> float:
"""Gross profit / gross loss on daily returns (informative extra metric)."""
_require_cols(portfolio, ["strategy_return"])
r = portfolio["strategy_return"].dropna()
gp = r[r > 0].sum()
gl = -r[r < 0].sum()
if gl == 0:
return float("inf") if gp > 0 else 0.0
return float(gp / gl)
def calculate_all_metrics(portfolio: pd.DataFrame, risk_free_rate: float = 0.02) -> Dict[str, float]:
return {
"Cumulative Return (%)": calculate_cumulative_return(portfolio),
"Annualized Return (%)": calculate_annualized_return(portfolio),
"Sharpe Ratio": calculate_sharpe_ratio(portfolio, risk_free_rate),
"Maximum Drawdown (%)": calculate_maximum_drawdown(portfolio),
# Extras (not in table but handy)
"Win Rate (%)": calculate_win_rate(portfolio),
"Profit Factor": calculate_profit_factor(portfolio),
}
def print_metrics(metrics: Dict[str, float], strategy_name: str = "Strategy"):
print(f"\n{'='*60}")
print(f"{strategy_name} Performance Metrics")
print(f"{'='*60}")
for k, v in metrics.items():
if "Ratio" in k or "Factor" in k:
print(f"{k:30s}: {v:8.2f}")
else:
print(f"{k:30s}: {v:8.2f}%")
print(f"{'='*60}\n")
def create_comparison_table(all_metrics: Dict[str, Dict[str, float]]) -> pd.DataFrame:
df = pd.DataFrame(all_metrics).T
df = df.round(2)
if "Sharpe Ratio" in df.columns:
df = df.sort_values("Sharpe Ratio", ascending=False)
return df

View File

@ -0,0 +1,273 @@
"""
Main evaluation script to run backtesting and generate results.
Evaluates TradingAgents against baseline strategies for a single ticker.
"""
import argparse
import sys
from pathlib import Path
from datetime import datetime
import pandas as pd
import json
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from evaluation_long_short.baseline_strategies import get_all_baseline_strategies
from evaluation_long_short.backtest import BacktestEngine, TradingAgentsBacktester, load_stock_data, standardize_single_ticker
from evaluation_long_short.metrics import calculate_all_metrics, create_comparison_table, print_metrics
from evaluation_long_short.visualize import plot_cumulative_returns_from_results
from tradingagents.graph.trading_graph import TradingAgentsGraph
from tradingagents.default_config import DEFAULT_CONFIG
def is_debugging() -> bool:
try:
import debugpy
return debugpy.is_client_connected()
except Exception:
return False
def save_strategy_actions_to_json(
portfolio: pd.DataFrame,
strategy_name: str,
ticker: str,
start_date: str,
end_date: str,
output_dir: str
) -> None:
"""
Save daily actions from a strategy to a JSON file.
Args:
portfolio: Portfolio DataFrame with action, position, close, etc.
strategy_name: Name of the strategy
ticker: Stock ticker symbol
start_date: Start date of backtest
end_date: End date of backtest
output_dir: Directory to save the JSON file
"""
out = Path(output_dir) / ticker / strategy_name
out.mkdir(parents=True, exist_ok=True)
# Build actions list with relevant daily info
actions = []
for date, row in portfolio.iterrows():
date_str = date.strftime("%Y-%m-%d")
action_record = {
"date": date_str,
"action": int(row["action"]) if pd.notna(row["action"]) else 0, # 1=BUY, 0=HOLD, -1=SELL
"position": int(row["position"]) if pd.notna(row["position"]) else 0, # 1=long, 0=flat
"close_price": float(row["close"]) if pd.notna(row["close"]) else None,
"portfolio_value": float(row["portfolio_value"]) if pd.notna(row["portfolio_value"]) else None,
"strategy_return": float(row["strategy_return"]) if pd.notna(row["strategy_return"]) else 0.0,
"cumulative_return": float(row["cumulative_return"]) if pd.notna(row["cumulative_return"]) else 1.0
}
actions.append(action_record)
# Save to JSON
fp = out / f"actions_{start_date}_to_{end_date}.json"
with open(fp, "w") as f:
json.dump({
"strategy": strategy_name,
"ticker": ticker,
"start_date": start_date,
"end_date": end_date,
"total_days": len(actions),
"actions": actions
}, f, indent=2)
print(f" ✓ Saved {strategy_name} actions to: {fp}")
def run_evaluation(
ticker: str,
start_date: str,
end_date: str,
initial_capital: float = 100000,
include_tradingagents: bool = True,
output_dir: str = None,
config: dict = None
):
"""
Run complete evaluation: baselines + TradingAgents for a single ticker.
"""
print(f"\n{'='*80}")
print(f"EVALUATION: {ticker} from {start_date} to {end_date}")
print(f"Initial Capital: ${initial_capital:,.2f}")
print(f"{'='*80}\n")
# Output dir
if output_dir is None:
output_dir = f"eval_results/{ticker}/{datetime.now().strftime('%Y%m%d_%H%M%S')}"
out = Path(output_dir)
out.mkdir(parents=True, exist_ok=True)
# Load data
print("\n" + "="*80)
print("STEP 1: Loading Stock Data")
print("="*80)
data = load_stock_data(ticker, start_date, end_date)
data = standardize_single_ticker(data, ticker)
# Backtest engine
engine = BacktestEngine(data, initial_capital)
# Baselines
print("\n" + "="*80)
print("STEP 2: Running Baseline Strategies")
print("="*80)
baselines = get_all_baseline_strategies(initial_capital)
for name, strategy in baselines.items():
try:
print(f"\nRunning {name}...", end=" ")
portfolio = engine.run_strategy(strategy, start_date, end_date)
print("✓ Complete")
# Save actions to JSON
save_strategy_actions_to_json(portfolio, name, ticker, start_date, end_date, output_dir)
except Exception as e:
print(f"✗ Failed: {e}")
# TradingAgents
if include_tradingagents:
print("\n" + "="*80)
print("STEP 3: Running TradingAgents")
print("="*80)
try:
cfg = (config or DEFAULT_CONFIG).copy()
# Fast eval defaults (you can override from CLI)
cfg["deep_think_llm"] = cfg.get("deep_think_llm", "o4-mini")
cfg["quick_think_llm"] = cfg.get("quick_think_llm", "gpt-4o-mini")
cfg["max_debate_rounds"] = cfg.get("max_debate_rounds", 1)
cfg["max_risk_discuss_rounds"] = cfg.get("max_risk_discuss_rounds", 1)
# Deterministic-ish decoding for reproducibility
cfg.setdefault("llm_params", {}).update({"temperature": 0.7, "top_p": 1.0, "seed": 42})
print(f"\nInitializing TradingAgents...")
print(f" Deep Thinking LLM: {cfg['deep_think_llm']}")
print(f" Quick Thinking LLM: {cfg['quick_think_llm']}")
print(f" Debate Rounds: {cfg['max_debate_rounds']}")
graph = TradingAgentsGraph(
selected_analysts=["market", "social", "news", "fundamentals"],
debug=False,
config=cfg
)
ta_backtester = TradingAgentsBacktester(graph, initial_capital, output_dir)
ta_portfolio = ta_backtester.backtest(ticker, start_date, end_date, data)
engine.results["TradingAgents"] = ta_portfolio
print("\n✓ TradingAgents backtest complete")
# Save TradingAgents actions to JSON (in consistent format with baselines)
save_strategy_actions_to_json(ta_portfolio, "TradingAgents", ticker, start_date, end_date, output_dir)
except Exception as e:
print(f"\n✗ TradingAgents failed: {e}")
import traceback
traceback.print_exc()
# Metrics
print("\n" + "="*80)
print("STEP 4: Calculating Performance Metrics")
print("="*80)
all_metrics = {}
for name, portfolio in engine.results.items():
metrics = calculate_all_metrics(portfolio)
all_metrics[name] = metrics
print_metrics(metrics, name)
# Generate cumulative returns comparison plot
print("\n" + "="*80)
print("STEP 5: Generating Comparison Plot")
print("="*80)
try:
comparison_plot_path = str(out / ticker / "strategy_comparison.png")
plot_cumulative_returns_from_results(
results_dir=str(out / ticker),
ticker=ticker,
output_path=comparison_plot_path
)
# Also save as PDF
pdf_path = comparison_plot_path.replace('.png', '.pdf')
plot_cumulative_returns_from_results(
results_dir=str(out / ticker),
ticker=ticker,
output_path=pdf_path
)
print(f"\n✓ Comparison plot saved to:")
print(f" - {comparison_plot_path}")
print(f" - {pdf_path}")
except Exception as e:
print(f"\n✗ Failed to generate comparison plot: {e}")
import traceback
traceback.print_exc()
print("\n" + "="*80)
print("EVALUATION COMPLETE")
print("="*80)
print(f"\nResults saved to: {out}")
print(f"\nDaily actions JSON files saved for:")
for name in engine.results.keys():
print(f"{name}")
return engine.results, all_metrics
def main():
parser = argparse.ArgumentParser(description="Run TradingAgents evaluation with baseline comparisons")
parser.add_argument("--ticker", type=str, help="Stock ticker symbol (e.g., AAPL)")
parser.add_argument("--start-date", type=str, required=True, help="Start date (YYYY-MM-DD)")
parser.add_argument("--end-date", type=str, required=True, help="End date (YYYY-MM-DD)")
parser.add_argument("--capital", type=float, default=100000, help="Initial capital (default: 100000)")
parser.add_argument("--skip-tradingagents", action="store_true", help="Skip TradingAgents evaluation")
parser.add_argument("--output-dir", type=str, default=None, help="Output directory for results")
parser.add_argument("--deep-llm", type=str, default="o4-mini", help="Deep thinking LLM model")
parser.add_argument("--quick-llm", type=str, default="gpt-4o-mini", help="Quick thinking LLM model")
parser.add_argument("--debate-rounds", type=int, default=1, help="Number of debate rounds (default: 1)")
# Used for debugging
if is_debugging():
config = DEFAULT_CONFIG.copy()
config.update({
"deep_think_llm": "o4-mini",
"quick_think_llm": "gpt-4o-mini",
"max_debate_rounds": 1,
"max_risk_discuss_rounds": 1,
"llm_params": {"temperature": 0.7, "top_p": 1.0, "seed": 42},
})
run_evaluation(
ticker="AAPL",
start_date="2024-01-01",
end_date="2024-01-10",
initial_capital=1000,
include_tradingagents=True,
output_dir="./evaluation_long_short/results",
config=config
)
return
# Build config
args = parser.parse_args()
config = DEFAULT_CONFIG.copy()
config["deep_think_llm"] = args.deep_llm
config["quick_think_llm"] = args.quick_llm
config["max_debate_rounds"] = args.debate_rounds
config["max_risk_discuss_rounds"] = args.debate_rounds
config.setdefault("llm_params", {}).update({"temperature": 0, "top_p": 1.0, "seed": 42})
run_evaluation(
ticker=args.ticker,
start_date=args.start_date,
end_date=args.end_date,
initial_capital=args.capital,
include_tradingagents=not args.skip_tradingagents,
output_dir=args.output_dir,
config=config
)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,480 @@
"""
Visualization tools for trading strategy evaluation.
Generates plots and reports for comparing TradingAgents with baseline strategies.
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from typing import Dict
import warnings
import json
warnings.filterwarnings('ignore')
# Try to import seaborn for better styling (optional)
try:
import seaborn as sns
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
HAS_SEABORN = True
except ImportError:
HAS_SEABORN = False
# Use default matplotlib styling
plt.rcParams['figure.facecolor'] = 'white'
plt.rcParams['axes.facecolor'] = 'white'
plt.rcParams['axes.grid'] = True
def plot_cumulative_returns(
results: Dict[str, pd.DataFrame],
ticker: str,
output_path: str = None,
figsize: tuple = (14, 8)
) -> plt.Figure:
"""
Plot cumulative returns comparison for all strategies.
Args:
results: Dictionary mapping strategy name to portfolio DataFrame
ticker: Stock ticker symbol
output_path: Path to save the figure (optional)
figsize: Figure size (width, height)
Returns:
matplotlib Figure object
"""
fig, ax = plt.subplots(figsize=figsize)
for name, portfolio in results.items():
if "cumulative_return" in portfolio.columns:
cumulative = (portfolio["cumulative_return"] - 1) * 100 # Convert to percentage
ax.plot(portfolio.index, cumulative, label=name, linewidth=2, alpha=0.8)
ax.set_xlabel('Date', fontsize=12, fontweight='bold')
ax.set_ylabel('Cumulative Return (%)', fontsize=12, fontweight='bold')
ax.set_title(f'{ticker} - Cumulative Returns Comparison', fontsize=14, fontweight='bold')
ax.legend(loc='best', fontsize=10, framealpha=0.9)
ax.grid(True, alpha=0.3)
ax.axhline(y=0, color='black', linestyle='--', linewidth=1, alpha=0.5)
# Format y-axis as percentage
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.1f}%'))
plt.tight_layout()
if output_path:
fig.savefig(output_path, dpi=300, bbox_inches='tight')
print(f"✓ Saved cumulative returns plot to: {output_path}")
return fig
def plot_transaction_history(
portfolio: pd.DataFrame,
ticker: str,
strategy_name: str = "TradingAgents",
output_path: str = None,
figsize: tuple = (14, 10)
) -> plt.Figure:
"""
Plot transaction history with buy/sell signals overlaid on price chart.
Args:
portfolio: Portfolio DataFrame with 'signal' and 'close' columns
ticker: Stock ticker symbol
strategy_name: Name of the strategy
output_path: Path to save the figure (optional)
figsize: Figure size (width, height)
Returns:
matplotlib Figure object
"""
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize, height_ratios=[2, 1])
# Price chart with signals
ax1.plot(portfolio.index, portfolio["close"], label='Close Price',
color='blue', linewidth=1.5, alpha=0.7)
# Buy signals (signal == 1 and previous signal != 1)
signals = portfolio["signal"].copy()
buy_signals = (signals == 1) & (signals.shift(1) != 1)
sell_signals = (signals == -1) & (signals.shift(1) != -1)
# Plot buy/sell markers
if buy_signals.any():
ax1.scatter(portfolio.index[buy_signals],
portfolio.loc[buy_signals, "close"],
marker='^', color='green', s=100, label='Buy',
zorder=5, alpha=0.8)
if sell_signals.any():
ax1.scatter(portfolio.index[sell_signals],
portfolio.loc[sell_signals, "close"],
marker='v', color='red', s=100, label='Sell',
zorder=5, alpha=0.8)
ax1.set_ylabel('Price ($)', fontsize=12, fontweight='bold')
ax1.set_title(f'{ticker} - {strategy_name} Transaction History',
fontsize=14, fontweight='bold')
ax1.legend(loc='best', fontsize=10)
ax1.grid(True, alpha=0.3)
# Portfolio value
ax2.plot(portfolio.index, portfolio["portfolio_value"],
label='Portfolio Value', color='purple', linewidth=2)
ax2.fill_between(portfolio.index, portfolio["portfolio_value"],
alpha=0.3, color='purple')
ax2.set_xlabel('Date', fontsize=12, fontweight='bold')
ax2.set_ylabel('Portfolio Value ($)', fontsize=12, fontweight='bold')
ax2.legend(loc='best', fontsize=10)
ax2.grid(True, alpha=0.3)
# Format y-axis as currency
ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'${y:,.0f}'))
plt.tight_layout()
if output_path:
fig.savefig(output_path, dpi=300, bbox_inches='tight')
print(f"✓ Saved transaction history plot to: {output_path}")
return fig
def plot_metrics_comparison(
comparison_df: pd.DataFrame,
ticker: str,
output_path: str = None,
figsize: tuple = (16, 10)
) -> plt.Figure:
"""
Create bar charts comparing key metrics across strategies.
Args:
comparison_df: DataFrame with strategies as rows and metrics as columns
ticker: Stock ticker symbol
output_path: Path to save the figure (optional)
figsize: Figure size (width, height)
Returns:
matplotlib Figure object
"""
# Select key metrics (matching paper's Table 1)
metrics_to_plot = [
"Cumulative Return (%)",
"Annualized Return (%)",
"Sharpe Ratio",
"Maximum Drawdown (%)"
]
# Filter to available metrics
available_metrics = [m for m in metrics_to_plot if m in comparison_df.columns]
if not available_metrics:
raise ValueError("No matching metrics found in comparison DataFrame")
n_metrics = len(available_metrics)
fig, axes = plt.subplots(2, 2, figsize=figsize)
axes = axes.flatten()
for idx, metric in enumerate(available_metrics):
ax = axes[idx]
data = comparison_df[metric].sort_values(ascending=False)
# Color code: TradingAgents in different color
colors = ['#FF6B6B' if name == 'TradingAgents' else '#4ECDC4'
for name in data.index]
bars = ax.barh(range(len(data)), data.values, color=colors, alpha=0.8)
ax.set_yticks(range(len(data)))
ax.set_yticklabels(data.index, fontsize=10)
ax.set_xlabel(metric, fontsize=11, fontweight='bold')
ax.set_title(metric, fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3, axis='x')
# Add value labels on bars
for i, (bar, value) in enumerate(zip(bars, data.values)):
if "Ratio" in metric:
label = f'{value:.2f}'
else:
label = f'{value:.1f}%'
ax.text(value, bar.get_y() + bar.get_height()/2,
f' {label}', va='center', fontsize=9)
# Hide unused subplots
for idx in range(n_metrics, 4):
axes[idx].axis('off')
fig.suptitle(f'{ticker} - Performance Metrics Comparison',
fontsize=16, fontweight='bold', y=0.995)
plt.tight_layout()
if output_path:
fig.savefig(output_path, dpi=300, bbox_inches='tight')
print(f"✓ Saved metrics comparison plot to: {output_path}")
return fig
def plot_drawdown(
results: Dict[str, pd.DataFrame],
ticker: str,
output_path: str = None,
figsize: tuple = (14, 8)
) -> plt.Figure:
"""
Plot drawdown analysis for all strategies.
Args:
results: Dictionary mapping strategy name to portfolio DataFrame
ticker: Stock ticker symbol
output_path: Path to save the figure (optional)
figsize: Figure size (width, height)
Returns:
matplotlib Figure object
"""
fig, ax = plt.subplots(figsize=figsize)
for name, portfolio in results.items():
if "portfolio_value" in portfolio.columns:
values = portfolio["portfolio_value"]
running_max = values.cummax()
drawdown = (values - running_max) / running_max * 100
ax.plot(portfolio.index, drawdown, label=name, linewidth=2, alpha=0.7)
ax.set_xlabel('Date', fontsize=12, fontweight='bold')
ax.set_ylabel('Drawdown (%)', fontsize=12, fontweight='bold')
ax.set_title(f'{ticker} - Drawdown Analysis', fontsize=14, fontweight='bold')
ax.legend(loc='best', fontsize=10, framealpha=0.9)
ax.grid(True, alpha=0.3)
ax.axhline(y=0, color='black', linestyle='--', linewidth=1, alpha=0.5)
# Fill drawdown areas
for name, portfolio in results.items():
if "portfolio_value" in portfolio.columns:
values = portfolio["portfolio_value"]
running_max = values.cummax()
drawdown = (values - running_max) / running_max * 100
ax.fill_between(portfolio.index, drawdown, 0, alpha=0.1)
# Format y-axis as percentage
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.1f}%'))
plt.tight_layout()
if output_path:
fig.savefig(output_path, dpi=300, bbox_inches='tight')
print(f"✓ Saved drawdown plot to: {output_path}")
return fig
def plot_returns_distribution(
results: Dict[str, pd.DataFrame],
ticker: str,
output_path: str = None,
figsize: tuple = (14, 8)
) -> plt.Figure:
"""
Plot distribution of daily returns for all strategies.
Args:
results: Dictionary mapping strategy name to portfolio DataFrame
ticker: Stock ticker symbol
output_path: Path to save the figure (optional)
figsize: Figure size (width, height)
Returns:
matplotlib Figure object
"""
fig, ax = plt.subplots(figsize=figsize)
for name, portfolio in results.items():
if "strategy_return" in portfolio.columns:
returns = portfolio["strategy_return"].dropna() * 100 # Convert to percentage
ax.hist(returns, bins=50, alpha=0.5, label=name, density=True)
ax.set_xlabel('Daily Return (%)', fontsize=12, fontweight='bold')
ax.set_ylabel('Density', fontsize=12, fontweight='bold')
ax.set_title(f'{ticker} - Returns Distribution', fontsize=14, fontweight='bold')
ax.legend(loc='best', fontsize=10)
ax.grid(True, alpha=0.3)
ax.axvline(x=0, color='black', linestyle='--', linewidth=1, alpha=0.5)
plt.tight_layout()
if output_path:
fig.savefig(output_path, dpi=300, bbox_inches='tight')
print(f"✓ Saved returns distribution plot to: {output_path}")
return fig
def create_summary_report(
ticker: str,
results: Dict[str, pd.DataFrame],
comparison_df: pd.DataFrame,
output_dir: str
) -> None:
"""
Generate comprehensive visual summary report.
Creates all standard plots and saves them to output directory.
Args:
ticker: Stock ticker symbol
results: Dictionary mapping strategy name to portfolio DataFrame
comparison_df: DataFrame with performance metrics comparison
output_dir: Directory to save output files
"""
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
print("\nGenerating visualizations...")
# 1. Cumulative Returns
try:
plot_cumulative_returns(
results,
ticker,
output_path=str(output_path / f"{ticker}_cumulative_returns.png")
)
except Exception as e:
print(f"✗ Failed to generate cumulative returns plot: {e}")
# 2. Metrics Comparison
try:
plot_metrics_comparison(
comparison_df,
ticker,
output_path=str(output_path / f"{ticker}_metrics_comparison.png")
)
except Exception as e:
print(f"✗ Failed to generate metrics comparison plot: {e}")
# 3. Drawdown Analysis
try:
plot_drawdown(
results,
ticker,
output_path=str(output_path / f"{ticker}_drawdown.png")
)
except Exception as e:
print(f"✗ Failed to generate drawdown plot: {e}")
# 4. Transaction History (if TradingAgents results available)
if "TradingAgents" in results:
try:
plot_transaction_history(
results["TradingAgents"],
ticker,
strategy_name="TradingAgents",
output_path=str(output_path / f"{ticker}_TradingAgents_transactions.png")
)
except Exception as e:
print(f"✗ Failed to generate transaction history plot: {e}")
# 5. Returns Distribution
try:
plot_returns_distribution(
results,
ticker,
output_path=str(output_path / f"{ticker}_returns_distribution.png")
)
except Exception as e:
print(f"✗ Failed to generate returns distribution plot: {e}")
print(f"\n✓ All visualizations saved to: {output_dir}")
def plot_cumulative_returns_from_results(
results_dir: str,
ticker: str,
output_path: str = None,
figsize: tuple = (12, 7)
) -> plt.Figure:
"""
Plot cumulative returns comparison from saved JSON results.
Args:
results_dir: Directory containing strategy result folders
ticker: Stock ticker symbol
output_path: Path to save the figure (optional)
figsize: Figure size (width, height)
Returns:
matplotlib Figure object
"""
results_path = Path(results_dir)
# Define strategies to load
strategies = {
'BuyAndHold': 'BuyAndHoldStrategy',
'MACD': 'MACDStrategy',
'KDJ&RSI': 'KDJRSIStrategy',
'ZMR': 'ZMRStrategy',
'SMA': 'SMAStrategy',
'TradingAgents': 'TradingAgents'
}
fig, ax = plt.subplots(figsize=figsize)
# Load and plot each strategy
for folder_name, display_name in strategies.items():
strategy_dir = results_path / folder_name
if not strategy_dir.exists():
continue
# Find actions JSON file
action_files = list(strategy_dir.glob("actions_*.json"))
if not action_files:
continue
try:
# Load data
with open(action_files[0], 'r') as f:
data = json.load(f)
# Extract date and cumulative_return
dates = pd.to_datetime([action['date'] for action in data['actions']])
cumulative_returns = [action['cumulative_return'] for action in data['actions']]
# Plot
linewidth = 2.5 if display_name == 'TradingAgents' else 1.5
ax.plot(dates, cumulative_returns, label=display_name,
linewidth=linewidth, alpha=0.9)
except Exception as e:
print(f"Warning: Failed to load {display_name}: {e}")
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Cumulative Return', fontsize=12)
ax.set_title(f'Strategy Comparison - Cumulative Returns for {ticker}',
fontsize=14, fontweight='bold')
ax.legend(title='Strategies', loc='best', fontsize=10, framealpha=0.9)
ax.grid(True, alpha=0.3, linestyle='--')
ax.axhline(y=1.0, color='black', linestyle='--', linewidth=1, alpha=0.5)
plt.tight_layout()
if output_path:
fig.savefig(output_path, dpi=300, bbox_inches='tight')
print(f"✓ Saved cumulative returns comparison to: {output_path}")
return fig
if __name__ == "__main__":
# Example usage / testing
print("Visualization module loaded successfully!")
print("\nAvailable functions:")
print(" - plot_cumulative_returns")
print(" - plot_cumulative_returns_from_results")
print(" - plot_transaction_history")
print(" - plot_metrics_comparison")
print(" - plot_drawdown")
print(" - plot_returns_distribution")
print(" - create_summary_report")