From 7df8ae203f6ae4c43d03e094ef17080f91627e83 Mon Sep 17 00:00:00 2001 From: Zichen Liu Date: Fri, 7 Nov 2025 16:59:55 -0600 Subject: [PATCH] change to longShort --- evaluation_long_short/__init__.py | 66 +++ evaluation_long_short/backtest.py | 251 ++++++++++ evaluation_long_short/baseline_strategies.py | 185 +++++++ evaluation_long_short/metrics.py | 116 +++++ evaluation_long_short/run_evaluation.py | 273 +++++++++++ evaluation_long_short/visualize.py | 480 +++++++++++++++++++ 6 files changed, 1371 insertions(+) create mode 100644 evaluation_long_short/__init__.py create mode 100644 evaluation_long_short/backtest.py create mode 100644 evaluation_long_short/baseline_strategies.py create mode 100644 evaluation_long_short/metrics.py create mode 100644 evaluation_long_short/run_evaluation.py create mode 100644 evaluation_long_short/visualize.py diff --git a/evaluation_long_short/__init__.py b/evaluation_long_short/__init__.py new file mode 100644 index 00000000..58590254 --- /dev/null +++ b/evaluation_long_short/__init__.py @@ -0,0 +1,66 @@ +from .baseline_strategies import ( + BuyAndHoldStrategy, + MACDStrategy, + KDJRSIStrategy, + ZMRStrategy, + SMAStrategy, + get_all_baseline_strategies +) + +from .metrics import ( + calculate_cumulative_return, + calculate_annualized_return, + calculate_sharpe_ratio, + calculate_maximum_drawdown, + calculate_all_metrics, + create_comparison_table +) + +from .backtest import ( + BacktestEngine, + TradingAgentsBacktester, + load_stock_data +) + +from .visualize import ( + plot_cumulative_returns, + plot_transaction_history, + plot_metrics_comparison, + plot_drawdown, + create_summary_report +) + +from .run_evaluation import run_evaluation + +__all__ = [ + # Strategies + 'BuyAndHoldStrategy', + 'MACDStrategy', + 'KDJRSIStrategy', + 'ZMRStrategy', + 'SMAStrategy', + 'get_all_baseline_strategies', + + # Metrics + 'calculate_cumulative_return', + 'calculate_annualized_return', + 'calculate_sharpe_ratio', + 'calculate_maximum_drawdown', + 'calculate_all_metrics', + 'create_comparison_table', + + # Backtesting + 'BacktestEngine', + 'TradingAgentsBacktester', + 'load_stock_data', + + # Visualization + 'plot_cumulative_returns', + 'plot_transaction_history', + 'plot_metrics_comparison', + 'plot_drawdown', + 'create_summary_report', + + # Main evaluation + 'run_evaluation', +] \ No newline at end of file diff --git a/evaluation_long_short/backtest.py b/evaluation_long_short/backtest.py new file mode 100644 index 00000000..77347c07 --- /dev/null +++ b/evaluation_long_short/backtest.py @@ -0,0 +1,251 @@ +""" +Backtesting engine for TradingAgents and baseline strategies. + +Both TradingAgents and rule-based strategies use identical return calculation logic: + 1. Generate signals/actions: 1 (BUY), 0 (HOLD), -1 (SELL) + 2. Convert actions to positions: 1 (long), 0 (flat) + 3. Calculate returns: strategy_return = position.shift(1) * market_return + +This ensures apples-to-apples comparison across all strategies. +""" + +import pandas as pd +import numpy as np +from typing import Dict, List +from pathlib import Path +import json + + +STD_FIELDS = {"Open", "High", "Low", "Close", "Adj Close", "Volume"} + + +class TradingAgentsBacktester: + """Backtest engine for TradingAgents framework.""" + + def __init__(self, trading_agents_graph, initial_capital=100000, output_dir=None): + self.graph = trading_agents_graph + self.initial_capital = float(initial_capital) + self.name = "TradingAgents" + self.output_dir = output_dir + + def backtest(self, ticker: str, start_date: str, end_date: str, data: pd.DataFrame) -> pd.DataFrame: + """ + Backtest TradingAgents using the same return calculation logic as rule-based strategies. + + Process: + 1. Collect signals (actions: 1=BUY, 0=HOLD, -1=SELL) for all dates + 2. Convert actions to positions (0=flat, 1=long) using same logic as baselines + 3. Calculate returns as: strategy_return = position.shift(1) * market_return + """ + # Restrict to window + df = data.loc[start_date:end_date].copy() + + decisions: List[Dict] = [] + signals = pd.Series(0, index=df.index, dtype=float) + + print(f"\nRunning TradingAgents backtest on {ticker} from {start_date} to {end_date}") + print(f"Total trading days: {len(df)}") + print("-" * 80) + + # Step 1: Collect all signals/decisions + for i, (date, row) in enumerate(df.iterrows()): + date_str = date.strftime("%Y-%m-%d") + price = float(row["Close"]) + + # Get decision from TradingAgents graph + try: + print(f"\n[{i+1}/{len(df)}] {date_str} ... ", end="") + final_state, decision = self.graph.propagate(ticker, date_str) + print(f"Decision: {decision}") + signal = self._parse_decision(decision) + decisions.append({"date": date_str, "decision": decision, "signal": signal, "price": price}) + + except Exception as e: + print(f"Error: {e}") + signal = 0 + decisions.append({"date": date_str, "decision": "ERROR", "signal": 0, "price": price, "error": str(e)}) + + signals.loc[date] = signal + + # Step 2: Convert actions to positions (same logic as baseline strategies) + position = self._actions_to_position(signals) + + # Step 3: Calculate returns using standardized logic + close = pd.to_numeric(df["Close"], errors="coerce") + market_ret = close.pct_change().fillna(0.0) + exposure = position.shift(1).fillna(0.0) # Yesterday's position determines today's exposure + strat_ret = (exposure * market_ret).astype(float) + + cumret = (1.0 + strat_ret).cumprod() + portval = self.initial_capital * cumret + + # Build portfolio DataFrame with same structure as baseline strategies + portfolio = pd.DataFrame(index=df.index) + portfolio["action"] = signals # 1=BUY, 0=HOLD, -1=SELL + portfolio["position"] = position # 1=long, 0=flat + portfolio["close"] = close + if "Volume" in df.columns: + vol = df["Volume"] + if isinstance(vol, pd.DataFrame) and vol.shape[1] == 1: + vol = vol.iloc[:, 0] + if isinstance(vol, pd.Series): + portfolio["Volume"] = vol + portfolio["market_return"] = market_ret + portfolio["strategy_return"] = strat_ret + portfolio["cumulative_return"] = cumret + portfolio["portfolio_value"] = portval + portfolio["trade_delta"] = portfolio["position"].diff().fillna(0.0) # +1=buy, -1=sell + + self._save_decisions_log(ticker, decisions, start_date, end_date) + return portfolio + + @staticmethod + def _actions_to_position(actions: pd.Series) -> pd.Series: + """ + Convert action series to a long-only position series in {0,1}. + Same logic as baseline strategies for consistency. + """ + a = actions.astype(float).fillna(0.0).clip(-1, 1).values + pos = np.zeros_like(a, dtype=float) + for i in range(len(a)): + if i == 0: + pos[i] = 1.0 if a[i] > 0 else 0.0 + else: + if a[i] > 0: # buy → long + pos[i] = 1.0 + elif a[i] < 0: # sell → flat + pos[i] = 0.0 + else: # hold → keep previous + pos[i] = pos[i-1] + return pd.Series(pos, index=actions.index, name="position") + + def _parse_decision(self, decision: str) -> int: + """ + Parse decision to signal. + We interpret: + - contains 'BUY' or 'LONG' -> 1 + - contains 'SELL' or 'EXIT' -> -1 (we use -1 as 'close to cash' here) + - otherwise HOLD -> 0 + """ + d = str(decision).upper() + if "BUY" in d or "LONG" in d: + return 1 + if "SELL" in d or "EXIT" in d or "CLOSE" in d: + return -1 + return 0 + + def _save_decisions_log(self, ticker: str, decisions: List[Dict], start_date: str, end_date: str): + # Use output_dir if provided, otherwise use default + if self.output_dir: + out = Path(self.output_dir) / ticker / "TradingAgents" + else: + out = Path(f"eval_results/{ticker}/TradingAgents") + out.mkdir(parents=True, exist_ok=True) + fp = out / f"decisions_{start_date}_to_{end_date}.json" + with open(fp, "w") as f: + json.dump({ + "strategy": "TradingAgents", + "ticker": ticker, + "start_date": start_date, + "end_date": end_date, + "total_days": len(decisions), + "decisions": decisions + }, f, indent=2) + print(f" ✓ Saved TradingAgents detailed decisions to: {fp}") + + +class BacktestEngine: + """Engine to run and compare multiple strategies.""" + + def __init__(self, data: pd.DataFrame, initial_capital: float = 100000): + self.data = data + self.initial_capital = float(initial_capital) + self.results: Dict[str, pd.DataFrame] = {} + + def run_strategy(self, strategy, start_date: str = None, end_date: str = None, label = None) -> pd.DataFrame: + data_filtered = self.data.loc[start_date:end_date] if (start_date and end_date) else self.data + print(f"\nRunning {strategy.name}...") + portfolio = strategy.backtest(data_filtered) + self.results[label or strategy.name] = portfolio + return portfolio + + def run_all_strategies(self, strategies: Dict, start_date: str = None, end_date: str = None): + for name, strategy in strategies.items(): + try: + self.run_strategy(strategy, start_date, end_date) + print(f"✓ {name} completed") + except Exception as e: + print(f"✗ {name} failed: {e}") + + def get_results(self) -> Dict[str, pd.DataFrame]: + return self.results + + +def load_stock_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame: + try: + import yfinance as yf + # Normalize accidental ('A','A','P','L') / ['A','A','P','L'] + if isinstance(ticker, (list, tuple)) and all(isinstance(c, str) and len(c) == 1 for c in ticker): + ticker = "".join(ticker) + + if not isinstance(ticker, str): + raise ValueError("Pass a single ticker symbol as a string, e.g., 'AAPL'.") + + df = yf.download(ticker, start=start_date, end=end_date, progress=False) + if df.empty: + raise ValueError(f"No data found for {ticker}") + return df + + except Exception as e: + print(f"Error loading data: {e}") + raise + +def standardize_single_ticker(df: pd.DataFrame, ticker: str | None = None) -> pd.DataFrame: + """Return a single-ticker OHLCV DataFrame with simple columns. + Works with yfinance single or multi-ticker outputs. + """ + df = df.copy() + + # If columns are MultiIndex (common with multi-ticker yfinance) + if isinstance(df.columns, pd.MultiIndex): + # Figure out which level is the field (Open/High/...) and which is ticker + lvl0 = set(map(str, df.columns.get_level_values(0))) + lvl1 = set(map(str, df.columns.get_level_values(1))) + if len(STD_FIELDS & lvl0) > 0: + field_level, ticker_level = 0, 1 + elif len(STD_FIELDS & lvl1) > 0: + field_level, ticker_level = 1, 0 + else: + raise ValueError("Cannot detect OHLCV field level in MultiIndex columns.") + + available = list(pd.Index(df.columns.get_level_values(ticker_level)).unique()) + + # Normalize weird ticker inputs like ('A','A','P','L') -> 'AAPL' + if isinstance(ticker, (list, tuple)) and all(isinstance(c, str) and len(c) == 1 for c in ticker): + ticker = "".join(ticker) + if ticker is None: + if len(available) != 1: + raise ValueError(f"Multi-ticker DataFrame. Pick one with ticker=..., available={available}") + ticker = available[0] + if str(ticker) not in map(str, available): + raise ValueError(f"Ticker {ticker!r} not in columns. Available: {available}") + + # Slice to that ticker and drop the ticker level + df = df.xs(ticker, axis=1, level=ticker_level) + + # Map Adj Close -> Close if Close missing + if "Close" not in df.columns and "Adj Close" in df.columns: + df = df.rename(columns={"Adj Close": "Close"}) + + # Final sanity + req = ["Open", "High", "Low", "Close"] + missing = [c for c in req if c not in df.columns] + if missing: + raise ValueError(f"Data missing columns: {missing}") + + # Ensure 'Close' is a Series (not 1-col DataFrame) + close = df["Close"] + if isinstance(close, pd.DataFrame) and close.shape[1] == 1: + df["Close"] = close.iloc[:, 0] + + return df \ No newline at end of file diff --git a/evaluation_long_short/baseline_strategies.py b/evaluation_long_short/baseline_strategies.py new file mode 100644 index 00000000..5e76fa30 --- /dev/null +++ b/evaluation_long_short/baseline_strategies.py @@ -0,0 +1,185 @@ +import pandas as pd +import numpy as np +from abc import ABC, abstractmethod + + +class BaseStrategy(ABC): + """Base class for trading strategies (long-only, action-based).""" + + def __init__(self, initial_capital=100000): + self.initial_capital = float(initial_capital) + self.name = self.__class__.__name__ + + def _close_series(self, data: pd.DataFrame) -> pd.Series: + close = data["Close"] + if isinstance(close, pd.DataFrame): + if close.shape[1] == 1: + close = close.iloc[:, 0] + else: + raise ValueError("Multiple 'Close' columns detected. Pass single-ticker data.") + return pd.to_numeric(close, errors="coerce") + + @abstractmethod + def generate_signals(self, data: pd.DataFrame) -> pd.Series: + """ + Generate *actions* by date: + 1 = BUY (open / go long, or stay long) + 0 = HOLD (no change) + -1 = SELL (exit to flat) + Shorting is NOT allowed. + """ + pass + + def _prep_ohlcv(self, data: pd.DataFrame) -> pd.DataFrame: + req = ["Open", "High", "Low", "Close"] + for col in req: + if col not in data.columns: + raise ValueError(f"Data missing column '{col}'") + return data.copy() + + @staticmethod + def _actions_to_position(actions: pd.Series) -> pd.Series: + """Convert action series to a long-only position series in {0,1}.""" + a = actions.astype(float).fillna(0.0).clip(-1, 1).values + pos = np.zeros_like(a, dtype=float) + for i in range(len(a)): + if i == 0: + pos[i] = a[i] # origin position = signal + else: + if a[i] == 0: # HOLD + pos[i] = pos[i-1] + else: + pos[i] = a[i] # LONG or SHORT + return pd.Series(pos, index=actions.index, name="position") + + def backtest(self, data: pd.DataFrame) -> pd.DataFrame: + df = self._prep_ohlcv(data) + + # 1) get actions (1, 0, -1) + actions = self.generate_signals(df).reindex(df.index).fillna(0).clip(-1, 1).astype(float) + + # 2) map actions → long-only position {0,1} + position = self._actions_to_position(actions) + + # 3) compute returns (note: sell today → flat tomorrow → 0 return tomorrow) + close = self._close_series(df) + market_ret = close.pct_change().fillna(0.0) + exposure = position.shift(1).fillna(0.0) # use yesterday's position + strat_ret = (exposure * market_ret).astype(float) + + cumret = (1.0 + strat_ret).cumprod() + portval = self.initial_capital * cumret + + portfolio = pd.DataFrame(index=df.index) + portfolio["action"] = actions # 1 buy / 0 hold / -1 sell + portfolio["position"] = position # 1 long / 0 flat + portfolio["close"] = close + if "Volume" in df.columns: + vol = df["Volume"] + if isinstance(vol, pd.DataFrame) and vol.shape[1] == 1: + vol = vol.iloc[:, 0] + if isinstance(vol, pd.Series): + portfolio["Volume"] = vol + portfolio["market_return"] = market_ret + portfolio["strategy_return"] = strat_ret + portfolio["cumulative_return"] = cumret + portfolio["portfolio_value"] = portval + portfolio["trade_delta"] = portfolio["position"].diff().fillna(0.0) # +1 buy, -1 sell + return portfolio + + +class BuyAndHoldStrategy(BaseStrategy): + """Buy on day 1 and hold long (no shorting).""" + + def generate_signals(self, data: pd.DataFrame) -> pd.Series: + a = pd.Series(0.0, index=data.index) + if len(a) > 0: + a.iloc[0] = 1.0 # buy once at start + return a + + +class MACDStrategy(BaseStrategy): + """MACD(12,26,9) Contrarian, long-only:MACD>signal → SELL(退出),MACD 0] = -1.0 # 卖出/退出(之前是做空) + a[diff < 0] = 1.0 # 买入/做多 + return a + + +class KDJRSIStrategy(BaseStrategy): + """KDJ + RSI 逆势逻辑(长多-only):超买 → 卖出;超卖 → 买入""" + + def generate_signals(self, data): + df = data.copy() + + # === RSI === + delta = df["Close"].diff() + up, down = delta.clip(lower=0), -delta.clip(upper=0) + rs = up.ewm(span=14, adjust=False).mean() / down.ewm(span=14, adjust=False).mean().replace(0, np.nan) + df["rsi"] = 100 - 100 / (1 + rs) + + # === KDJ === + low = df["Low"].rolling(9).min() + high = df["High"].rolling(9).max() + denom = (high - low).replace(0, np.nan) + rsv = 100 * (df["Close"] - low) / denom + k = rsv.ewm(com=2, adjust=False).mean() + df["kdj_k"] = k + + # === Actions === + a = pd.Series(0.0, index=df.index) + # 收紧阈值:RSI>75,K>85 → 卖出;RSI<25,K<15 → 买入 + a[(df["rsi"] > 75) & (df["kdj_k"] > 85)] = -1.0 + a[(df["rsi"] < 25) & (df["kdj_k"] < 15)] = 1.0 + return a + + +class ZMRStrategy(BaseStrategy): + + def generate_signals(self, data): + close = self._close_series(data) + mean = close.rolling(50).mean() + std = close.rolling(50).std() + z = (close - mean) / std.replace(0, np.nan) + + a = pd.Series(0.0, index=data.index) + a[z > 1.3] = -1.0 # 高估 → 卖出/退出 + a[z < -1.3] = 1.0 # 低估 → 买入/做多 + return a + + +class SMAStrategy(BaseStrategy): + + def __init__(self, initial_capital=100000, short_window=5, long_window=20): + super().__init__(initial_capital) + self.short_window = int(short_window) + self.long_window = int(long_window) + + def generate_signals(self, data: pd.DataFrame) -> pd.Series: + close = self._close_series(data) + short = close.rolling(window=self.short_window, min_periods=self.short_window).mean() + long_ = close.rolling(window=self.long_window, min_periods=self.long_window).mean() + a = pd.Series(0.0, index=data.index) + a[short > long_] = 1.0 + a[short < long_] = -1.0 + return a + + +def get_all_baseline_strategies(initial_capital=100000): + """Get all baseline strategies for comparison (long-only, action-based).""" + return { + "BuyAndHold": BuyAndHoldStrategy(initial_capital), + "MACD": MACDStrategy(initial_capital), + "KDJ&RSI": KDJRSIStrategy(initial_capital), + "ZMR": ZMRStrategy(initial_capital), + "SMA": SMAStrategy(initial_capital), + } diff --git a/evaluation_long_short/metrics.py b/evaluation_long_short/metrics.py new file mode 100644 index 00000000..2f2c2b4f --- /dev/null +++ b/evaluation_long_short/metrics.py @@ -0,0 +1,116 @@ +""" +Evaluation metrics for trading strategies. +Implements: Cumulative Return, Annualized Return, Sharpe Ratio, Maximum Drawdown +""" + +import pandas as pd +import numpy as np +from typing import Dict + + +def _require_cols(df: pd.DataFrame, cols): + missing = [c for c in cols if c not in df.columns] + if missing: + raise ValueError(f"Portfolio missing columns: {missing}") + + +def calculate_cumulative_return(portfolio: pd.DataFrame) -> float: + """CR% = (V_end / V_start - 1) * 100""" + _require_cols(portfolio, ["portfolio_value"]) + v_start = float(portfolio["portfolio_value"].iloc[0]) + v_end = float(portfolio["portfolio_value"].iloc[-1]) + if v_start <= 0: + return 0.0 + return (v_end / v_start - 1.0) * 100.0 + + +def calculate_annualized_return(portfolio: pd.DataFrame, trading_days: int | None = None) -> float: + """AR% = ((V_end / V_start) ** (1/years) - 1) * 100 with 252 trading days/year.""" + _require_cols(portfolio, ["portfolio_value"]) + v_start = float(portfolio["portfolio_value"].iloc[0]) + v_end = float(portfolio["portfolio_value"].iloc[-1]) + if v_start <= 0 or v_end <= 0: + return 0.0 + if trading_days is None: + trading_days = len(portfolio) + years = trading_days / 252.0 + if years <= 0: + return 0.0 + return ((v_end / v_start) ** (1.0 / years) - 1.0) * 100.0 + + +def calculate_sharpe_ratio(portfolio: pd.DataFrame, risk_free_rate: float = 0.02) -> float: + """ + SR = (E[r] - r_f) / stdev(r), where r are *daily* strategy returns, + annualized using 252 trading days (paper S1.2.3). + """ + _require_cols(portfolio, ["strategy_return"]) + r = portfolio["strategy_return"].dropna().astype(float) + if len(r) < 2 or r.std() == 0: + return 0.0 + mean_ann = r.mean() * 252.0 + std_ann = r.std(ddof=1) * np.sqrt(252.0) + if std_ann == 0: + return 0.0 + return (mean_ann - risk_free_rate) / std_ann + + +def calculate_maximum_drawdown(portfolio: pd.DataFrame) -> float: + """MDD% = max drawdown on portfolio_value (peak->trough) * 100""" + _require_cols(portfolio, ["portfolio_value"]) + values = portfolio["portfolio_value"].astype(float) + running_max = values.cummax() + drawdown = (values - running_max) / running_max + return float(drawdown.min() * -100.0) + + +def calculate_win_rate(portfolio: pd.DataFrame) -> float: + """% days where strategy_return > 0""" + _require_cols(portfolio, ["strategy_return"]) + r = portfolio["strategy_return"].dropna() + if len(r) == 0: + return 0.0 + return 100.0 * (r > 0).sum() / len(r) + + +def calculate_profit_factor(portfolio: pd.DataFrame) -> float: + """Gross profit / gross loss on daily returns (informative extra metric).""" + _require_cols(portfolio, ["strategy_return"]) + r = portfolio["strategy_return"].dropna() + gp = r[r > 0].sum() + gl = -r[r < 0].sum() + if gl == 0: + return float("inf") if gp > 0 else 0.0 + return float(gp / gl) + + +def calculate_all_metrics(portfolio: pd.DataFrame, risk_free_rate: float = 0.02) -> Dict[str, float]: + return { + "Cumulative Return (%)": calculate_cumulative_return(portfolio), + "Annualized Return (%)": calculate_annualized_return(portfolio), + "Sharpe Ratio": calculate_sharpe_ratio(portfolio, risk_free_rate), + "Maximum Drawdown (%)": calculate_maximum_drawdown(portfolio), + # Extras (not in table but handy) + "Win Rate (%)": calculate_win_rate(portfolio), + "Profit Factor": calculate_profit_factor(portfolio), + } + + +def print_metrics(metrics: Dict[str, float], strategy_name: str = "Strategy"): + print(f"\n{'='*60}") + print(f"{strategy_name} Performance Metrics") + print(f"{'='*60}") + for k, v in metrics.items(): + if "Ratio" in k or "Factor" in k: + print(f"{k:30s}: {v:8.2f}") + else: + print(f"{k:30s}: {v:8.2f}%") + print(f"{'='*60}\n") + + +def create_comparison_table(all_metrics: Dict[str, Dict[str, float]]) -> pd.DataFrame: + df = pd.DataFrame(all_metrics).T + df = df.round(2) + if "Sharpe Ratio" in df.columns: + df = df.sort_values("Sharpe Ratio", ascending=False) + return df diff --git a/evaluation_long_short/run_evaluation.py b/evaluation_long_short/run_evaluation.py new file mode 100644 index 00000000..63ea077e --- /dev/null +++ b/evaluation_long_short/run_evaluation.py @@ -0,0 +1,273 @@ +""" +Main evaluation script to run backtesting and generate results. +Evaluates TradingAgents against baseline strategies for a single ticker. +""" + +import argparse +import sys +from pathlib import Path +from datetime import datetime +import pandas as pd +import json + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from evaluation_long_short.baseline_strategies import get_all_baseline_strategies +from evaluation_long_short.backtest import BacktestEngine, TradingAgentsBacktester, load_stock_data, standardize_single_ticker +from evaluation_long_short.metrics import calculate_all_metrics, create_comparison_table, print_metrics +from evaluation_long_short.visualize import plot_cumulative_returns_from_results + +from tradingagents.graph.trading_graph import TradingAgentsGraph +from tradingagents.default_config import DEFAULT_CONFIG + +def is_debugging() -> bool: + try: + import debugpy + return debugpy.is_client_connected() + except Exception: + return False + + +def save_strategy_actions_to_json( + portfolio: pd.DataFrame, + strategy_name: str, + ticker: str, + start_date: str, + end_date: str, + output_dir: str +) -> None: + """ + Save daily actions from a strategy to a JSON file. + + Args: + portfolio: Portfolio DataFrame with action, position, close, etc. + strategy_name: Name of the strategy + ticker: Stock ticker symbol + start_date: Start date of backtest + end_date: End date of backtest + output_dir: Directory to save the JSON file + """ + out = Path(output_dir) / ticker / strategy_name + out.mkdir(parents=True, exist_ok=True) + + # Build actions list with relevant daily info + actions = [] + for date, row in portfolio.iterrows(): + date_str = date.strftime("%Y-%m-%d") + action_record = { + "date": date_str, + "action": int(row["action"]) if pd.notna(row["action"]) else 0, # 1=BUY, 0=HOLD, -1=SELL + "position": int(row["position"]) if pd.notna(row["position"]) else 0, # 1=long, 0=flat + "close_price": float(row["close"]) if pd.notna(row["close"]) else None, + "portfolio_value": float(row["portfolio_value"]) if pd.notna(row["portfolio_value"]) else None, + "strategy_return": float(row["strategy_return"]) if pd.notna(row["strategy_return"]) else 0.0, + "cumulative_return": float(row["cumulative_return"]) if pd.notna(row["cumulative_return"]) else 1.0 + } + actions.append(action_record) + + # Save to JSON + fp = out / f"actions_{start_date}_to_{end_date}.json" + with open(fp, "w") as f: + json.dump({ + "strategy": strategy_name, + "ticker": ticker, + "start_date": start_date, + "end_date": end_date, + "total_days": len(actions), + "actions": actions + }, f, indent=2) + + print(f" ✓ Saved {strategy_name} actions to: {fp}") + + +def run_evaluation( + ticker: str, + start_date: str, + end_date: str, + initial_capital: float = 100000, + include_tradingagents: bool = True, + output_dir: str = None, + config: dict = None +): + """ + Run complete evaluation: baselines + TradingAgents for a single ticker. + """ + print(f"\n{'='*80}") + print(f"EVALUATION: {ticker} from {start_date} to {end_date}") + print(f"Initial Capital: ${initial_capital:,.2f}") + print(f"{'='*80}\n") + + # Output dir + if output_dir is None: + output_dir = f"eval_results/{ticker}/{datetime.now().strftime('%Y%m%d_%H%M%S')}" + out = Path(output_dir) + out.mkdir(parents=True, exist_ok=True) + + # Load data + print("\n" + "="*80) + print("STEP 1: Loading Stock Data") + print("="*80) + data = load_stock_data(ticker, start_date, end_date) + data = standardize_single_ticker(data, ticker) + + # Backtest engine + engine = BacktestEngine(data, initial_capital) + + # Baselines + print("\n" + "="*80) + print("STEP 2: Running Baseline Strategies") + print("="*80) + baselines = get_all_baseline_strategies(initial_capital) + + for name, strategy in baselines.items(): + try: + print(f"\nRunning {name}...", end=" ") + portfolio = engine.run_strategy(strategy, start_date, end_date) + print("✓ Complete") + # Save actions to JSON + save_strategy_actions_to_json(portfolio, name, ticker, start_date, end_date, output_dir) + except Exception as e: + print(f"✗ Failed: {e}") + + # TradingAgents + if include_tradingagents: + print("\n" + "="*80) + print("STEP 3: Running TradingAgents") + print("="*80) + try: + cfg = (config or DEFAULT_CONFIG).copy() + # Fast eval defaults (you can override from CLI) + cfg["deep_think_llm"] = cfg.get("deep_think_llm", "o4-mini") + cfg["quick_think_llm"] = cfg.get("quick_think_llm", "gpt-4o-mini") + cfg["max_debate_rounds"] = cfg.get("max_debate_rounds", 1) + cfg["max_risk_discuss_rounds"] = cfg.get("max_risk_discuss_rounds", 1) + # Deterministic-ish decoding for reproducibility + cfg.setdefault("llm_params", {}).update({"temperature": 0.7, "top_p": 1.0, "seed": 42}) + + print(f"\nInitializing TradingAgents...") + print(f" Deep Thinking LLM: {cfg['deep_think_llm']}") + print(f" Quick Thinking LLM: {cfg['quick_think_llm']}") + print(f" Debate Rounds: {cfg['max_debate_rounds']}") + + graph = TradingAgentsGraph( + selected_analysts=["market", "social", "news", "fundamentals"], + debug=False, + config=cfg + ) + ta_backtester = TradingAgentsBacktester(graph, initial_capital, output_dir) + ta_portfolio = ta_backtester.backtest(ticker, start_date, end_date, data) + + engine.results["TradingAgents"] = ta_portfolio + print("\n✓ TradingAgents backtest complete") + + # Save TradingAgents actions to JSON (in consistent format with baselines) + save_strategy_actions_to_json(ta_portfolio, "TradingAgents", ticker, start_date, end_date, output_dir) + + except Exception as e: + print(f"\n✗ TradingAgents failed: {e}") + import traceback + traceback.print_exc() + + # Metrics + print("\n" + "="*80) + print("STEP 4: Calculating Performance Metrics") + print("="*80) + all_metrics = {} + for name, portfolio in engine.results.items(): + metrics = calculate_all_metrics(portfolio) + all_metrics[name] = metrics + print_metrics(metrics, name) + + # Generate cumulative returns comparison plot + print("\n" + "="*80) + print("STEP 5: Generating Comparison Plot") + print("="*80) + try: + comparison_plot_path = str(out / ticker / "strategy_comparison.png") + plot_cumulative_returns_from_results( + results_dir=str(out / ticker), + ticker=ticker, + output_path=comparison_plot_path + ) + # Also save as PDF + pdf_path = comparison_plot_path.replace('.png', '.pdf') + plot_cumulative_returns_from_results( + results_dir=str(out / ticker), + ticker=ticker, + output_path=pdf_path + ) + print(f"\n✓ Comparison plot saved to:") + print(f" - {comparison_plot_path}") + print(f" - {pdf_path}") + except Exception as e: + print(f"\n✗ Failed to generate comparison plot: {e}") + import traceback + traceback.print_exc() + + print("\n" + "="*80) + print("EVALUATION COMPLETE") + print("="*80) + print(f"\nResults saved to: {out}") + print(f"\nDaily actions JSON files saved for:") + for name in engine.results.keys(): + print(f" ✓ {name}") + + return engine.results, all_metrics + + +def main(): + parser = argparse.ArgumentParser(description="Run TradingAgents evaluation with baseline comparisons") + parser.add_argument("--ticker", type=str, help="Stock ticker symbol (e.g., AAPL)") + parser.add_argument("--start-date", type=str, required=True, help="Start date (YYYY-MM-DD)") + parser.add_argument("--end-date", type=str, required=True, help="End date (YYYY-MM-DD)") + parser.add_argument("--capital", type=float, default=100000, help="Initial capital (default: 100000)") + parser.add_argument("--skip-tradingagents", action="store_true", help="Skip TradingAgents evaluation") + parser.add_argument("--output-dir", type=str, default=None, help="Output directory for results") + parser.add_argument("--deep-llm", type=str, default="o4-mini", help="Deep thinking LLM model") + parser.add_argument("--quick-llm", type=str, default="gpt-4o-mini", help="Quick thinking LLM model") + parser.add_argument("--debate-rounds", type=int, default=1, help="Number of debate rounds (default: 1)") + + # Used for debugging + + if is_debugging(): + config = DEFAULT_CONFIG.copy() + config.update({ + "deep_think_llm": "o4-mini", + "quick_think_llm": "gpt-4o-mini", + "max_debate_rounds": 1, + "max_risk_discuss_rounds": 1, + "llm_params": {"temperature": 0.7, "top_p": 1.0, "seed": 42}, + }) + run_evaluation( + ticker="AAPL", + start_date="2024-01-01", + end_date="2024-01-10", + initial_capital=1000, + include_tradingagents=True, + output_dir="./evaluation_long_short/results", + config=config + ) + return + + # Build config + args = parser.parse_args() + config = DEFAULT_CONFIG.copy() + config["deep_think_llm"] = args.deep_llm + config["quick_think_llm"] = args.quick_llm + config["max_debate_rounds"] = args.debate_rounds + config["max_risk_discuss_rounds"] = args.debate_rounds + config.setdefault("llm_params", {}).update({"temperature": 0, "top_p": 1.0, "seed": 42}) + + run_evaluation( + ticker=args.ticker, + start_date=args.start_date, + end_date=args.end_date, + initial_capital=args.capital, + include_tradingagents=not args.skip_tradingagents, + output_dir=args.output_dir, + config=config + ) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/evaluation_long_short/visualize.py b/evaluation_long_short/visualize.py new file mode 100644 index 00000000..416fa89a --- /dev/null +++ b/evaluation_long_short/visualize.py @@ -0,0 +1,480 @@ +""" +Visualization tools for trading strategy evaluation. +Generates plots and reports for comparing TradingAgents with baseline strategies. +""" + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from pathlib import Path +from typing import Dict +import warnings +import json + +warnings.filterwarnings('ignore') + +# Try to import seaborn for better styling (optional) +try: + import seaborn as sns + plt.style.use('seaborn-v0_8-darkgrid') + sns.set_palette("husl") + HAS_SEABORN = True +except ImportError: + HAS_SEABORN = False + # Use default matplotlib styling + plt.rcParams['figure.facecolor'] = 'white' + plt.rcParams['axes.facecolor'] = 'white' + plt.rcParams['axes.grid'] = True + + +def plot_cumulative_returns( + results: Dict[str, pd.DataFrame], + ticker: str, + output_path: str = None, + figsize: tuple = (14, 8) +) -> plt.Figure: + """ + Plot cumulative returns comparison for all strategies. + + Args: + results: Dictionary mapping strategy name to portfolio DataFrame + ticker: Stock ticker symbol + output_path: Path to save the figure (optional) + figsize: Figure size (width, height) + + Returns: + matplotlib Figure object + """ + fig, ax = plt.subplots(figsize=figsize) + + for name, portfolio in results.items(): + if "cumulative_return" in portfolio.columns: + cumulative = (portfolio["cumulative_return"] - 1) * 100 # Convert to percentage + ax.plot(portfolio.index, cumulative, label=name, linewidth=2, alpha=0.8) + + ax.set_xlabel('Date', fontsize=12, fontweight='bold') + ax.set_ylabel('Cumulative Return (%)', fontsize=12, fontweight='bold') + ax.set_title(f'{ticker} - Cumulative Returns Comparison', fontsize=14, fontweight='bold') + ax.legend(loc='best', fontsize=10, framealpha=0.9) + ax.grid(True, alpha=0.3) + ax.axhline(y=0, color='black', linestyle='--', linewidth=1, alpha=0.5) + + # Format y-axis as percentage + ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.1f}%')) + + plt.tight_layout() + + if output_path: + fig.savefig(output_path, dpi=300, bbox_inches='tight') + print(f"✓ Saved cumulative returns plot to: {output_path}") + + return fig + + +def plot_transaction_history( + portfolio: pd.DataFrame, + ticker: str, + strategy_name: str = "TradingAgents", + output_path: str = None, + figsize: tuple = (14, 10) +) -> plt.Figure: + """ + Plot transaction history with buy/sell signals overlaid on price chart. + + Args: + portfolio: Portfolio DataFrame with 'signal' and 'close' columns + ticker: Stock ticker symbol + strategy_name: Name of the strategy + output_path: Path to save the figure (optional) + figsize: Figure size (width, height) + + Returns: + matplotlib Figure object + """ + fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize, height_ratios=[2, 1]) + + # Price chart with signals + ax1.plot(portfolio.index, portfolio["close"], label='Close Price', + color='blue', linewidth=1.5, alpha=0.7) + + # Buy signals (signal == 1 and previous signal != 1) + signals = portfolio["signal"].copy() + buy_signals = (signals == 1) & (signals.shift(1) != 1) + sell_signals = (signals == -1) & (signals.shift(1) != -1) + + # Plot buy/sell markers + if buy_signals.any(): + ax1.scatter(portfolio.index[buy_signals], + portfolio.loc[buy_signals, "close"], + marker='^', color='green', s=100, label='Buy', + zorder=5, alpha=0.8) + + if sell_signals.any(): + ax1.scatter(portfolio.index[sell_signals], + portfolio.loc[sell_signals, "close"], + marker='v', color='red', s=100, label='Sell', + zorder=5, alpha=0.8) + + ax1.set_ylabel('Price ($)', fontsize=12, fontweight='bold') + ax1.set_title(f'{ticker} - {strategy_name} Transaction History', + fontsize=14, fontweight='bold') + ax1.legend(loc='best', fontsize=10) + ax1.grid(True, alpha=0.3) + + # Portfolio value + ax2.plot(portfolio.index, portfolio["portfolio_value"], + label='Portfolio Value', color='purple', linewidth=2) + ax2.fill_between(portfolio.index, portfolio["portfolio_value"], + alpha=0.3, color='purple') + ax2.set_xlabel('Date', fontsize=12, fontweight='bold') + ax2.set_ylabel('Portfolio Value ($)', fontsize=12, fontweight='bold') + ax2.legend(loc='best', fontsize=10) + ax2.grid(True, alpha=0.3) + + # Format y-axis as currency + ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'${y:,.0f}')) + + plt.tight_layout() + + if output_path: + fig.savefig(output_path, dpi=300, bbox_inches='tight') + print(f"✓ Saved transaction history plot to: {output_path}") + + return fig + + +def plot_metrics_comparison( + comparison_df: pd.DataFrame, + ticker: str, + output_path: str = None, + figsize: tuple = (16, 10) +) -> plt.Figure: + """ + Create bar charts comparing key metrics across strategies. + + Args: + comparison_df: DataFrame with strategies as rows and metrics as columns + ticker: Stock ticker symbol + output_path: Path to save the figure (optional) + figsize: Figure size (width, height) + + Returns: + matplotlib Figure object + """ + # Select key metrics (matching paper's Table 1) + metrics_to_plot = [ + "Cumulative Return (%)", + "Annualized Return (%)", + "Sharpe Ratio", + "Maximum Drawdown (%)" + ] + + # Filter to available metrics + available_metrics = [m for m in metrics_to_plot if m in comparison_df.columns] + + if not available_metrics: + raise ValueError("No matching metrics found in comparison DataFrame") + + n_metrics = len(available_metrics) + fig, axes = plt.subplots(2, 2, figsize=figsize) + axes = axes.flatten() + + for idx, metric in enumerate(available_metrics): + ax = axes[idx] + data = comparison_df[metric].sort_values(ascending=False) + + # Color code: TradingAgents in different color + colors = ['#FF6B6B' if name == 'TradingAgents' else '#4ECDC4' + for name in data.index] + + bars = ax.barh(range(len(data)), data.values, color=colors, alpha=0.8) + ax.set_yticks(range(len(data))) + ax.set_yticklabels(data.index, fontsize=10) + ax.set_xlabel(metric, fontsize=11, fontweight='bold') + ax.set_title(metric, fontsize=12, fontweight='bold') + ax.grid(True, alpha=0.3, axis='x') + + # Add value labels on bars + for i, (bar, value) in enumerate(zip(bars, data.values)): + if "Ratio" in metric: + label = f'{value:.2f}' + else: + label = f'{value:.1f}%' + ax.text(value, bar.get_y() + bar.get_height()/2, + f' {label}', va='center', fontsize=9) + + # Hide unused subplots + for idx in range(n_metrics, 4): + axes[idx].axis('off') + + fig.suptitle(f'{ticker} - Performance Metrics Comparison', + fontsize=16, fontweight='bold', y=0.995) + plt.tight_layout() + + if output_path: + fig.savefig(output_path, dpi=300, bbox_inches='tight') + print(f"✓ Saved metrics comparison plot to: {output_path}") + + return fig + + +def plot_drawdown( + results: Dict[str, pd.DataFrame], + ticker: str, + output_path: str = None, + figsize: tuple = (14, 8) +) -> plt.Figure: + """ + Plot drawdown analysis for all strategies. + + Args: + results: Dictionary mapping strategy name to portfolio DataFrame + ticker: Stock ticker symbol + output_path: Path to save the figure (optional) + figsize: Figure size (width, height) + + Returns: + matplotlib Figure object + """ + fig, ax = plt.subplots(figsize=figsize) + + for name, portfolio in results.items(): + if "portfolio_value" in portfolio.columns: + values = portfolio["portfolio_value"] + running_max = values.cummax() + drawdown = (values - running_max) / running_max * 100 + ax.plot(portfolio.index, drawdown, label=name, linewidth=2, alpha=0.7) + + ax.set_xlabel('Date', fontsize=12, fontweight='bold') + ax.set_ylabel('Drawdown (%)', fontsize=12, fontweight='bold') + ax.set_title(f'{ticker} - Drawdown Analysis', fontsize=14, fontweight='bold') + ax.legend(loc='best', fontsize=10, framealpha=0.9) + ax.grid(True, alpha=0.3) + ax.axhline(y=0, color='black', linestyle='--', linewidth=1, alpha=0.5) + + # Fill drawdown areas + for name, portfolio in results.items(): + if "portfolio_value" in portfolio.columns: + values = portfolio["portfolio_value"] + running_max = values.cummax() + drawdown = (values - running_max) / running_max * 100 + ax.fill_between(portfolio.index, drawdown, 0, alpha=0.1) + + # Format y-axis as percentage + ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.1f}%')) + + plt.tight_layout() + + if output_path: + fig.savefig(output_path, dpi=300, bbox_inches='tight') + print(f"✓ Saved drawdown plot to: {output_path}") + + return fig + + +def plot_returns_distribution( + results: Dict[str, pd.DataFrame], + ticker: str, + output_path: str = None, + figsize: tuple = (14, 8) +) -> plt.Figure: + """ + Plot distribution of daily returns for all strategies. + + Args: + results: Dictionary mapping strategy name to portfolio DataFrame + ticker: Stock ticker symbol + output_path: Path to save the figure (optional) + figsize: Figure size (width, height) + + Returns: + matplotlib Figure object + """ + fig, ax = plt.subplots(figsize=figsize) + + for name, portfolio in results.items(): + if "strategy_return" in portfolio.columns: + returns = portfolio["strategy_return"].dropna() * 100 # Convert to percentage + ax.hist(returns, bins=50, alpha=0.5, label=name, density=True) + + ax.set_xlabel('Daily Return (%)', fontsize=12, fontweight='bold') + ax.set_ylabel('Density', fontsize=12, fontweight='bold') + ax.set_title(f'{ticker} - Returns Distribution', fontsize=14, fontweight='bold') + ax.legend(loc='best', fontsize=10) + ax.grid(True, alpha=0.3) + ax.axvline(x=0, color='black', linestyle='--', linewidth=1, alpha=0.5) + + plt.tight_layout() + + if output_path: + fig.savefig(output_path, dpi=300, bbox_inches='tight') + print(f"✓ Saved returns distribution plot to: {output_path}") + + return fig + + +def create_summary_report( + ticker: str, + results: Dict[str, pd.DataFrame], + comparison_df: pd.DataFrame, + output_dir: str +) -> None: + """ + Generate comprehensive visual summary report. + Creates all standard plots and saves them to output directory. + + Args: + ticker: Stock ticker symbol + results: Dictionary mapping strategy name to portfolio DataFrame + comparison_df: DataFrame with performance metrics comparison + output_dir: Directory to save output files + """ + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + print("\nGenerating visualizations...") + + # 1. Cumulative Returns + try: + plot_cumulative_returns( + results, + ticker, + output_path=str(output_path / f"{ticker}_cumulative_returns.png") + ) + except Exception as e: + print(f"✗ Failed to generate cumulative returns plot: {e}") + + # 2. Metrics Comparison + try: + plot_metrics_comparison( + comparison_df, + ticker, + output_path=str(output_path / f"{ticker}_metrics_comparison.png") + ) + except Exception as e: + print(f"✗ Failed to generate metrics comparison plot: {e}") + + # 3. Drawdown Analysis + try: + plot_drawdown( + results, + ticker, + output_path=str(output_path / f"{ticker}_drawdown.png") + ) + except Exception as e: + print(f"✗ Failed to generate drawdown plot: {e}") + + # 4. Transaction History (if TradingAgents results available) + if "TradingAgents" in results: + try: + plot_transaction_history( + results["TradingAgents"], + ticker, + strategy_name="TradingAgents", + output_path=str(output_path / f"{ticker}_TradingAgents_transactions.png") + ) + except Exception as e: + print(f"✗ Failed to generate transaction history plot: {e}") + + # 5. Returns Distribution + try: + plot_returns_distribution( + results, + ticker, + output_path=str(output_path / f"{ticker}_returns_distribution.png") + ) + except Exception as e: + print(f"✗ Failed to generate returns distribution plot: {e}") + + print(f"\n✓ All visualizations saved to: {output_dir}") + + +def plot_cumulative_returns_from_results( + results_dir: str, + ticker: str, + output_path: str = None, + figsize: tuple = (12, 7) +) -> plt.Figure: + """ + Plot cumulative returns comparison from saved JSON results. + + Args: + results_dir: Directory containing strategy result folders + ticker: Stock ticker symbol + output_path: Path to save the figure (optional) + figsize: Figure size (width, height) + + Returns: + matplotlib Figure object + """ + results_path = Path(results_dir) + + # Define strategies to load + strategies = { + 'BuyAndHold': 'BuyAndHoldStrategy', + 'MACD': 'MACDStrategy', + 'KDJ&RSI': 'KDJRSIStrategy', + 'ZMR': 'ZMRStrategy', + 'SMA': 'SMAStrategy', + 'TradingAgents': 'TradingAgents' + } + + fig, ax = plt.subplots(figsize=figsize) + + # Load and plot each strategy + for folder_name, display_name in strategies.items(): + strategy_dir = results_path / folder_name + if not strategy_dir.exists(): + continue + + # Find actions JSON file + action_files = list(strategy_dir.glob("actions_*.json")) + if not action_files: + continue + + try: + # Load data + with open(action_files[0], 'r') as f: + data = json.load(f) + + # Extract date and cumulative_return + dates = pd.to_datetime([action['date'] for action in data['actions']]) + cumulative_returns = [action['cumulative_return'] for action in data['actions']] + + # Plot + linewidth = 2.5 if display_name == 'TradingAgents' else 1.5 + ax.plot(dates, cumulative_returns, label=display_name, + linewidth=linewidth, alpha=0.9) + + except Exception as e: + print(f"Warning: Failed to load {display_name}: {e}") + + ax.set_xlabel('Date', fontsize=12) + ax.set_ylabel('Cumulative Return', fontsize=12) + ax.set_title(f'Strategy Comparison - Cumulative Returns for {ticker}', + fontsize=14, fontweight='bold') + ax.legend(title='Strategies', loc='best', fontsize=10, framealpha=0.9) + ax.grid(True, alpha=0.3, linestyle='--') + ax.axhline(y=1.0, color='black', linestyle='--', linewidth=1, alpha=0.5) + + plt.tight_layout() + + if output_path: + fig.savefig(output_path, dpi=300, bbox_inches='tight') + print(f"✓ Saved cumulative returns comparison to: {output_path}") + + return fig + + +if __name__ == "__main__": + # Example usage / testing + print("Visualization module loaded successfully!") + print("\nAvailable functions:") + print(" - plot_cumulative_returns") + print(" - plot_cumulative_returns_from_results") + print(" - plot_transaction_history") + print(" - plot_metrics_comparison") + print(" - plot_drawdown") + print(" - plot_returns_distribution") + print(" - create_summary_report") +