From 7df8ae203f6ae4c43d03e094ef17080f91627e83 Mon Sep 17 00:00:00 2001
From: Zichen Liu <zichenliu@Zichens-MacBook-Air.local>
Date: Fri, 7 Nov 2025 16:59:55 -0600
Subject: [PATCH] change to longShort

---
 evaluation_long_short/__init__.py            |  66 +++
 evaluation_long_short/backtest.py            | 251 ++++++++++
 evaluation_long_short/baseline_strategies.py | 185 +++++++
 evaluation_long_short/metrics.py             | 116 +++++
 evaluation_long_short/run_evaluation.py      | 273 +++++++++++
 evaluation_long_short/visualize.py           | 480 +++++++++++++++++++
 6 files changed, 1371 insertions(+)
 create mode 100644 evaluation_long_short/__init__.py
 create mode 100644 evaluation_long_short/backtest.py
 create mode 100644 evaluation_long_short/baseline_strategies.py
 create mode 100644 evaluation_long_short/metrics.py
 create mode 100644 evaluation_long_short/run_evaluation.py
 create mode 100644 evaluation_long_short/visualize.py

diff --git a/evaluation_long_short/__init__.py b/evaluation_long_short/__init__.py
new file mode 100644
index 00000000..58590254
--- /dev/null
+++ b/evaluation_long_short/__init__.py
@@ -0,0 +1,66 @@
+from .baseline_strategies import (
+    BuyAndHoldStrategy,
+    MACDStrategy,
+    KDJRSIStrategy,
+    ZMRStrategy,
+    SMAStrategy,
+    get_all_baseline_strategies
+)
+
+from .metrics import (
+    calculate_cumulative_return,
+    calculate_annualized_return,
+    calculate_sharpe_ratio,
+    calculate_maximum_drawdown,
+    calculate_all_metrics,
+    create_comparison_table
+)
+
+from .backtest import (
+    BacktestEngine,
+    TradingAgentsBacktester,
+    load_stock_data
+)
+
+from .visualize import (
+    plot_cumulative_returns,
+    plot_transaction_history,
+    plot_metrics_comparison,
+    plot_drawdown,
+    create_summary_report
+)
+
+from .run_evaluation import run_evaluation
+
+__all__ = [
+    # Strategies
+    'BuyAndHoldStrategy',
+    'MACDStrategy',
+    'KDJRSIStrategy',
+    'ZMRStrategy',
+    'SMAStrategy',
+    'get_all_baseline_strategies',
+
+    # Metrics
+    'calculate_cumulative_return',
+    'calculate_annualized_return',
+    'calculate_sharpe_ratio',
+    'calculate_maximum_drawdown',
+    'calculate_all_metrics',
+    'create_comparison_table',
+
+    # Backtesting
+    'BacktestEngine',
+    'TradingAgentsBacktester',
+    'load_stock_data',
+
+    # Visualization
+    'plot_cumulative_returns',
+    'plot_transaction_history',
+    'plot_metrics_comparison',
+    'plot_drawdown',
+    'create_summary_report',
+
+    # Main evaluation
+    'run_evaluation',
+]
\ No newline at end of file
diff --git a/evaluation_long_short/backtest.py b/evaluation_long_short/backtest.py
new file mode 100644
index 00000000..77347c07
--- /dev/null
+++ b/evaluation_long_short/backtest.py
@@ -0,0 +1,251 @@
+"""
+Backtesting engine for TradingAgents and baseline strategies.
+
+Both TradingAgents and rule-based strategies use identical return calculation logic:
+    1. Generate signals/actions: 1 (BUY), 0 (HOLD), -1 (SELL)
+    2. Convert actions to positions: 1 (long), 0 (flat)
+    3. Calculate returns: strategy_return = position.shift(1) * market_return
+
+This ensures apples-to-apples comparison across all strategies.
+"""
+
+import pandas as pd
+import numpy as np
+from typing import Dict, List
+from pathlib import Path
+import json
+
+
+STD_FIELDS = {"Open", "High", "Low", "Close", "Adj Close", "Volume"}
+
+
+class TradingAgentsBacktester:
+    """Backtest engine for TradingAgents framework."""
+
+    def __init__(self, trading_agents_graph, initial_capital=100000, output_dir=None):
+        self.graph = trading_agents_graph
+        self.initial_capital = float(initial_capital)
+        self.name = "TradingAgents"
+        self.output_dir = output_dir
+
+    def backtest(self, ticker: str, start_date: str, end_date: str, data: pd.DataFrame) -> pd.DataFrame:
+        """
+        Backtest TradingAgents using the same return calculation logic as rule-based strategies.
+        
+        Process:
+        1. Collect signals (actions: 1=BUY, 0=HOLD, -1=SELL) for all dates
+        2. Convert actions to positions (0=flat, 1=long) using same logic as baselines
+        3. Calculate returns as: strategy_return = position.shift(1) * market_return
+        """
+        # Restrict to window
+        df = data.loc[start_date:end_date].copy()
+        
+        decisions: List[Dict] = []
+        signals = pd.Series(0, index=df.index, dtype=float)
+
+        print(f"\nRunning TradingAgents backtest on {ticker} from {start_date} to {end_date}")
+        print(f"Total trading days: {len(df)}")
+        print("-" * 80)
+
+        # Step 1: Collect all signals/decisions
+        for i, (date, row) in enumerate(df.iterrows()):
+            date_str = date.strftime("%Y-%m-%d")
+            price = float(row["Close"])
+
+            # Get decision from TradingAgents graph
+            try:
+                print(f"\n[{i+1}/{len(df)}] {date_str} ... ", end="")
+                final_state, decision = self.graph.propagate(ticker, date_str)
+                print(f"Decision: {decision}")
+                signal = self._parse_decision(decision)
+                decisions.append({"date": date_str, "decision": decision, "signal": signal, "price": price})
+
+            except Exception as e:
+                print(f"Error: {e}")
+                signal = 0
+                decisions.append({"date": date_str, "decision": "ERROR", "signal": 0, "price": price, "error": str(e)})
+
+            signals.loc[date] = signal
+
+        # Step 2: Convert actions to positions (same logic as baseline strategies)
+        position = self._actions_to_position(signals)
+        
+        # Step 3: Calculate returns using standardized logic
+        close = pd.to_numeric(df["Close"], errors="coerce")
+        market_ret = close.pct_change().fillna(0.0)
+        exposure = position.shift(1).fillna(0.0)  # Yesterday's position determines today's exposure
+        strat_ret = (exposure * market_ret).astype(float)
+        
+        cumret = (1.0 + strat_ret).cumprod()
+        portval = self.initial_capital * cumret
+        
+        # Build portfolio DataFrame with same structure as baseline strategies
+        portfolio = pd.DataFrame(index=df.index)
+        portfolio["action"] = signals                       # 1=BUY, 0=HOLD, -1=SELL
+        portfolio["position"] = position                    # 1=long, 0=flat
+        portfolio["close"] = close
+        if "Volume" in df.columns:
+            vol = df["Volume"]
+            if isinstance(vol, pd.DataFrame) and vol.shape[1] == 1:
+                vol = vol.iloc[:, 0]
+            if isinstance(vol, pd.Series):
+                portfolio["Volume"] = vol
+        portfolio["market_return"] = market_ret
+        portfolio["strategy_return"] = strat_ret
+        portfolio["cumulative_return"] = cumret
+        portfolio["portfolio_value"] = portval
+        portfolio["trade_delta"] = portfolio["position"].diff().fillna(0.0)  # +1=buy, -1=sell
+
+        self._save_decisions_log(ticker, decisions, start_date, end_date)
+        return portfolio
+
+    @staticmethod
+    def _actions_to_position(actions: pd.Series) -> pd.Series:
+        """
+        Convert action series to a long-only position series in {0,1}.
+        Same logic as baseline strategies for consistency.
+        """
+        a = actions.astype(float).fillna(0.0).clip(-1, 1).values
+        pos = np.zeros_like(a, dtype=float)
+        for i in range(len(a)):
+            if i == 0:
+                pos[i] = 1.0 if a[i] > 0 else 0.0
+            else:
+                if a[i] > 0:       # buy → long
+                    pos[i] = 1.0
+                elif a[i] < 0:     # sell → flat
+                    pos[i] = 0.0
+                else:              # hold → keep previous
+                    pos[i] = pos[i-1]
+        return pd.Series(pos, index=actions.index, name="position")
+
+    def _parse_decision(self, decision: str) -> int:
+        """
+        Parse decision to signal.
+        We interpret:
+          - contains 'BUY' or 'LONG' -> 1
+          - contains 'SELL' or 'EXIT' -> -1  (we use -1 as 'close to cash' here)
+          - otherwise HOLD -> 0
+        """
+        d = str(decision).upper()
+        if "BUY" in d or "LONG" in d:
+            return 1
+        if "SELL" in d or "EXIT" in d or "CLOSE" in d:
+            return -1
+        return 0
+
+    def _save_decisions_log(self, ticker: str, decisions: List[Dict], start_date: str, end_date: str):
+        # Use output_dir if provided, otherwise use default
+        if self.output_dir:
+            out = Path(self.output_dir) / ticker / "TradingAgents"
+        else:
+            out = Path(f"eval_results/{ticker}/TradingAgents")
+        out.mkdir(parents=True, exist_ok=True)
+        fp = out / f"decisions_{start_date}_to_{end_date}.json"
+        with open(fp, "w") as f:
+            json.dump({
+                "strategy": "TradingAgents",
+                "ticker": ticker,
+                "start_date": start_date,
+                "end_date": end_date,
+                "total_days": len(decisions),
+                "decisions": decisions
+            }, f, indent=2)
+        print(f"  ✓ Saved TradingAgents detailed decisions to: {fp}")
+
+
+class BacktestEngine:
+    """Engine to run and compare multiple strategies."""
+
+    def __init__(self, data: pd.DataFrame, initial_capital: float = 100000):
+        self.data = data
+        self.initial_capital = float(initial_capital)
+        self.results: Dict[str, pd.DataFrame] = {}
+
+    def run_strategy(self, strategy, start_date: str = None, end_date: str = None, label = None) -> pd.DataFrame:
+        data_filtered = self.data.loc[start_date:end_date] if (start_date and end_date) else self.data
+        print(f"\nRunning {strategy.name}...")
+        portfolio = strategy.backtest(data_filtered)
+        self.results[label or strategy.name] = portfolio
+        return portfolio
+
+    def run_all_strategies(self, strategies: Dict, start_date: str = None, end_date: str = None):
+        for name, strategy in strategies.items():
+            try:
+                self.run_strategy(strategy, start_date, end_date)
+                print(f"✓ {name} completed")
+            except Exception as e:
+                print(f"✗ {name} failed: {e}")
+
+    def get_results(self) -> Dict[str, pd.DataFrame]:
+        return self.results
+
+
+def load_stock_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame:
+    try:
+        import yfinance as yf
+        # Normalize accidental ('A','A','P','L') / ['A','A','P','L']
+        if isinstance(ticker, (list, tuple)) and all(isinstance(c, str) and len(c) == 1 for c in ticker):
+            ticker = "".join(ticker)
+
+        if not isinstance(ticker, str):
+            raise ValueError("Pass a single ticker symbol as a string, e.g., 'AAPL'.")
+
+        df = yf.download(ticker, start=start_date, end=end_date, progress=False)
+        if df.empty:
+            raise ValueError(f"No data found for {ticker}")
+        return df
+
+    except Exception as e:
+        print(f"Error loading data: {e}")
+        raise
+
+def standardize_single_ticker(df: pd.DataFrame, ticker: str | None = None) -> pd.DataFrame:
+    """Return a single-ticker OHLCV DataFrame with simple columns.
+       Works with yfinance single or multi-ticker outputs.
+    """
+    df = df.copy()
+
+    # If columns are MultiIndex (common with multi-ticker yfinance)
+    if isinstance(df.columns, pd.MultiIndex):
+        # Figure out which level is the field (Open/High/...) and which is ticker
+        lvl0 = set(map(str, df.columns.get_level_values(0)))
+        lvl1 = set(map(str, df.columns.get_level_values(1)))
+        if len(STD_FIELDS & lvl0) > 0:
+            field_level, ticker_level = 0, 1
+        elif len(STD_FIELDS & lvl1) > 0:
+            field_level, ticker_level = 1, 0
+        else:
+            raise ValueError("Cannot detect OHLCV field level in MultiIndex columns.")
+
+        available = list(pd.Index(df.columns.get_level_values(ticker_level)).unique())
+
+        # Normalize weird ticker inputs like ('A','A','P','L') -> 'AAPL'
+        if isinstance(ticker, (list, tuple)) and all(isinstance(c, str) and len(c) == 1 for c in ticker):
+            ticker = "".join(ticker)
+        if ticker is None:
+            if len(available) != 1:
+                raise ValueError(f"Multi-ticker DataFrame. Pick one with ticker=..., available={available}")
+            ticker = available[0]
+        if str(ticker) not in map(str, available):
+            raise ValueError(f"Ticker {ticker!r} not in columns. Available: {available}")
+
+        # Slice to that ticker and drop the ticker level
+        df = df.xs(ticker, axis=1, level=ticker_level)
+
+    # Map Adj Close -> Close if Close missing
+    if "Close" not in df.columns and "Adj Close" in df.columns:
+        df = df.rename(columns={"Adj Close": "Close"})
+
+    # Final sanity
+    req = ["Open", "High", "Low", "Close"]
+    missing = [c for c in req if c not in df.columns]
+    if missing:
+        raise ValueError(f"Data missing columns: {missing}")
+
+    # Ensure 'Close' is a Series (not 1-col DataFrame)
+    close = df["Close"]
+    if isinstance(close, pd.DataFrame) and close.shape[1] == 1:
+        df["Close"] = close.iloc[:, 0]
+
+    return df
\ No newline at end of file
diff --git a/evaluation_long_short/baseline_strategies.py b/evaluation_long_short/baseline_strategies.py
new file mode 100644
index 00000000..5e76fa30
--- /dev/null
+++ b/evaluation_long_short/baseline_strategies.py
@@ -0,0 +1,185 @@
+import pandas as pd
+import numpy as np
+from abc import ABC, abstractmethod
+
+
+class BaseStrategy(ABC):
+    """Base class for trading strategies (long-only, action-based)."""
+
+    def __init__(self, initial_capital=100000):
+        self.initial_capital = float(initial_capital)
+        self.name = self.__class__.__name__
+
+    def _close_series(self, data: pd.DataFrame) -> pd.Series:
+        close = data["Close"]
+        if isinstance(close, pd.DataFrame):
+            if close.shape[1] == 1:
+                close = close.iloc[:, 0]
+            else:
+                raise ValueError("Multiple 'Close' columns detected. Pass single-ticker data.")
+        return pd.to_numeric(close, errors="coerce")
+
+    @abstractmethod
+    def generate_signals(self, data: pd.DataFrame) -> pd.Series:
+        """
+        Generate *actions* by date:
+            1 = BUY (open / go long, or stay long)
+            0 = HOLD (no change)
+           -1 = SELL (exit to flat)
+        Shorting is NOT allowed.
+        """
+        pass
+
+    def _prep_ohlcv(self, data: pd.DataFrame) -> pd.DataFrame:
+        req = ["Open", "High", "Low", "Close"]
+        for col in req:
+            if col not in data.columns:
+                raise ValueError(f"Data missing column '{col}'")
+        return data.copy()
+
+    @staticmethod
+    def _actions_to_position(actions: pd.Series) -> pd.Series:
+        """Convert action series to a long-only position series in {0,1}."""
+        a = actions.astype(float).fillna(0.0).clip(-1, 1).values
+        pos = np.zeros_like(a, dtype=float)
+        for i in range(len(a)):
+            if i == 0:
+                pos[i] = a[i]  # origin position = signal
+            else:
+                if a[i] == 0:        # HOLD
+                    pos[i] = pos[i-1]
+                else:
+                    pos[i] = a[i]    # LONG or SHORT
+        return pd.Series(pos, index=actions.index, name="position")
+
+    def backtest(self, data: pd.DataFrame) -> pd.DataFrame:
+        df = self._prep_ohlcv(data)
+
+        # 1) get actions (1, 0, -1)
+        actions = self.generate_signals(df).reindex(df.index).fillna(0).clip(-1, 1).astype(float)
+
+        # 2) map actions → long-only position {0,1}
+        position = self._actions_to_position(actions)
+
+        # 3) compute returns (note: sell today → flat tomorrow → 0 return tomorrow)
+        close = self._close_series(df)
+        market_ret = close.pct_change().fillna(0.0)
+        exposure = position.shift(1).fillna(0.0)   # use yesterday's position
+        strat_ret = (exposure * market_ret).astype(float)
+
+        cumret = (1.0 + strat_ret).cumprod()
+        portval = self.initial_capital * cumret
+
+        portfolio = pd.DataFrame(index=df.index)
+        portfolio["action"] = actions                      # 1 buy / 0 hold / -1 sell
+        portfolio["position"] = position                   # 1 long / 0 flat
+        portfolio["close"] = close
+        if "Volume" in df.columns:
+            vol = df["Volume"]
+            if isinstance(vol, pd.DataFrame) and vol.shape[1] == 1:
+                vol = vol.iloc[:, 0]
+            if isinstance(vol, pd.Series):
+                portfolio["Volume"] = vol
+        portfolio["market_return"] = market_ret
+        portfolio["strategy_return"] = strat_ret
+        portfolio["cumulative_return"] = cumret
+        portfolio["portfolio_value"] = portval
+        portfolio["trade_delta"] = portfolio["position"].diff().fillna(0.0)  # +1 buy, -1 sell
+        return portfolio
+
+
+class BuyAndHoldStrategy(BaseStrategy):
+    """Buy on day 1 and hold long (no shorting)."""
+
+    def generate_signals(self, data: pd.DataFrame) -> pd.Series:
+        a = pd.Series(0.0, index=data.index)
+        if len(a) > 0:
+            a.iloc[0] = 1.0  # buy once at start
+        return a
+
+
+class MACDStrategy(BaseStrategy):
+    """MACD(12,26,9) Contrarian, long-only：MACD>signal → SELL(退出)，MACD<signal → BUY(做多)."""
+
+    def generate_signals(self, data):
+        df = data.copy()
+        ema_fast = df["Close"].ewm(span=12, adjust=False).mean()
+        ema_slow = df["Close"].ewm(span=26, adjust=False).mean()
+        macd = ema_fast - ema_slow
+        signal = macd.ewm(span=9, adjust=False).mean()
+        diff = macd - signal
+
+        a = pd.Series(0.0, index=df.index)
+        a[diff > 0] = -1.0   # 卖出/退出（之前是做空）
+        a[diff < 0] = 1.0    # 买入/做多
+        return a
+
+
+class KDJRSIStrategy(BaseStrategy):
+    """KDJ + RSI 逆势逻辑（长多-only）：超买 → 卖出；超卖 → 买入"""
+
+    def generate_signals(self, data):
+        df = data.copy()
+
+        # === RSI ===
+        delta = df["Close"].diff()
+        up, down = delta.clip(lower=0), -delta.clip(upper=0)
+        rs = up.ewm(span=14, adjust=False).mean() / down.ewm(span=14, adjust=False).mean().replace(0, np.nan)
+        df["rsi"] = 100 - 100 / (1 + rs)
+
+        # === KDJ ===
+        low = df["Low"].rolling(9).min()
+        high = df["High"].rolling(9).max()
+        denom = (high - low).replace(0, np.nan)
+        rsv = 100 * (df["Close"] - low) / denom
+        k = rsv.ewm(com=2, adjust=False).mean()
+        df["kdj_k"] = k
+
+        # === Actions ===
+        a = pd.Series(0.0, index=df.index)
+        # 收紧阈值：RSI>75,K>85 → 卖出；RSI<25,K<15 → 买入
+        a[(df["rsi"] > 75) & (df["kdj_k"] > 85)] = -1.0
+        a[(df["rsi"] < 25) & (df["kdj_k"] < 15)] = 1.0
+        return a
+
+
+class ZMRStrategy(BaseStrategy):
+
+    def generate_signals(self, data):
+        close = self._close_series(data)
+        mean = close.rolling(50).mean()
+        std = close.rolling(50).std()
+        z = (close - mean) / std.replace(0, np.nan)
+
+        a = pd.Series(0.0, index=data.index)
+        a[z > 1.3] = -1.0   # 高估 → 卖出/退出
+        a[z < -1.3] = 1.0   # 低估 → 买入/做多
+        return a
+
+
+class SMAStrategy(BaseStrategy):
+
+    def __init__(self, initial_capital=100000, short_window=5, long_window=20):
+        super().__init__(initial_capital)
+        self.short_window = int(short_window)
+        self.long_window = int(long_window)
+
+    def generate_signals(self, data: pd.DataFrame) -> pd.Series:
+        close = self._close_series(data)
+        short = close.rolling(window=self.short_window, min_periods=self.short_window).mean()
+        long_ = close.rolling(window=self.long_window, min_periods=self.long_window).mean()
+        a = pd.Series(0.0, index=data.index)
+        a[short > long_] = 1.0
+        a[short < long_] = -1.0
+        return a
+
+
+def get_all_baseline_strategies(initial_capital=100000):
+    """Get all baseline strategies for comparison (long-only, action-based)."""
+    return {
+        "BuyAndHold": BuyAndHoldStrategy(initial_capital),
+        "MACD": MACDStrategy(initial_capital),
+        "KDJ&RSI": KDJRSIStrategy(initial_capital),
+        "ZMR": ZMRStrategy(initial_capital),
+        "SMA": SMAStrategy(initial_capital),
+    }
diff --git a/evaluation_long_short/metrics.py b/evaluation_long_short/metrics.py
new file mode 100644
index 00000000..2f2c2b4f
--- /dev/null
+++ b/evaluation_long_short/metrics.py
@@ -0,0 +1,116 @@
+"""
+Evaluation metrics for trading strategies.
+Implements: Cumulative Return, Annualized Return, Sharpe Ratio, Maximum Drawdown
+"""
+
+import pandas as pd
+import numpy as np
+from typing import Dict
+
+
+def _require_cols(df: pd.DataFrame, cols):
+    missing = [c for c in cols if c not in df.columns]
+    if missing:
+        raise ValueError(f"Portfolio missing columns: {missing}")
+
+
+def calculate_cumulative_return(portfolio: pd.DataFrame) -> float:
+    """CR% = (V_end / V_start - 1) * 100"""
+    _require_cols(portfolio, ["portfolio_value"])
+    v_start = float(portfolio["portfolio_value"].iloc[0])
+    v_end = float(portfolio["portfolio_value"].iloc[-1])
+    if v_start <= 0:
+        return 0.0
+    return (v_end / v_start - 1.0) * 100.0
+
+
+def calculate_annualized_return(portfolio: pd.DataFrame, trading_days: int | None = None) -> float:
+    """AR% = ((V_end / V_start) ** (1/years) - 1) * 100 with 252 trading days/year."""
+    _require_cols(portfolio, ["portfolio_value"])
+    v_start = float(portfolio["portfolio_value"].iloc[0])
+    v_end = float(portfolio["portfolio_value"].iloc[-1])
+    if v_start <= 0 or v_end <= 0:
+        return 0.0
+    if trading_days is None:
+        trading_days = len(portfolio)
+    years = trading_days / 252.0
+    if years <= 0:
+        return 0.0
+    return ((v_end / v_start) ** (1.0 / years) - 1.0) * 100.0
+
+
+def calculate_sharpe_ratio(portfolio: pd.DataFrame, risk_free_rate: float = 0.02) -> float:
+    """
+    SR = (E[r] - r_f) / stdev(r), where r are *daily* strategy returns,
+    annualized using 252 trading days (paper S1.2.3).
+    """
+    _require_cols(portfolio, ["strategy_return"])
+    r = portfolio["strategy_return"].dropna().astype(float)
+    if len(r) < 2 or r.std() == 0:
+        return 0.0
+    mean_ann = r.mean() * 252.0
+    std_ann = r.std(ddof=1) * np.sqrt(252.0)
+    if std_ann == 0:
+        return 0.0
+    return (mean_ann - risk_free_rate) / std_ann
+
+
+def calculate_maximum_drawdown(portfolio: pd.DataFrame) -> float:
+    """MDD% = max drawdown on portfolio_value (peak->trough) * 100"""
+    _require_cols(portfolio, ["portfolio_value"])
+    values = portfolio["portfolio_value"].astype(float)
+    running_max = values.cummax()
+    drawdown = (values - running_max) / running_max
+    return float(drawdown.min() * -100.0)
+
+
+def calculate_win_rate(portfolio: pd.DataFrame) -> float:
+    """% days where strategy_return > 0"""
+    _require_cols(portfolio, ["strategy_return"])
+    r = portfolio["strategy_return"].dropna()
+    if len(r) == 0:
+        return 0.0
+    return 100.0 * (r > 0).sum() / len(r)
+
+
+def calculate_profit_factor(portfolio: pd.DataFrame) -> float:
+    """Gross profit / gross loss on daily returns (informative extra metric)."""
+    _require_cols(portfolio, ["strategy_return"])
+    r = portfolio["strategy_return"].dropna()
+    gp = r[r > 0].sum()
+    gl = -r[r < 0].sum()
+    if gl == 0:
+        return float("inf") if gp > 0 else 0.0
+    return float(gp / gl)
+
+
+def calculate_all_metrics(portfolio: pd.DataFrame, risk_free_rate: float = 0.02) -> Dict[str, float]:
+    return {
+        "Cumulative Return (%)": calculate_cumulative_return(portfolio),
+        "Annualized Return (%)": calculate_annualized_return(portfolio),
+        "Sharpe Ratio": calculate_sharpe_ratio(portfolio, risk_free_rate),
+        "Maximum Drawdown (%)": calculate_maximum_drawdown(portfolio),
+        # Extras (not in table but handy)
+        "Win Rate (%)": calculate_win_rate(portfolio),
+        "Profit Factor": calculate_profit_factor(portfolio),
+    }
+
+
+def print_metrics(metrics: Dict[str, float], strategy_name: str = "Strategy"):
+    print(f"\n{'='*60}")
+    print(f"{strategy_name} Performance Metrics")
+    print(f"{'='*60}")
+    for k, v in metrics.items():
+        if "Ratio" in k or "Factor" in k:
+            print(f"{k:30s}: {v:8.2f}")
+        else:
+            print(f"{k:30s}: {v:8.2f}%")
+    print(f"{'='*60}\n")
+
+
+def create_comparison_table(all_metrics: Dict[str, Dict[str, float]]) -> pd.DataFrame:
+    df = pd.DataFrame(all_metrics).T
+    df = df.round(2)
+    if "Sharpe Ratio" in df.columns:
+        df = df.sort_values("Sharpe Ratio", ascending=False)
+    return df
diff --git a/evaluation_long_short/run_evaluation.py b/evaluation_long_short/run_evaluation.py
new file mode 100644
index 00000000..63ea077e
--- /dev/null
+++ b/evaluation_long_short/run_evaluation.py
@@ -0,0 +1,273 @@
+"""
+Main evaluation script to run backtesting and generate results.
+Evaluates TradingAgents against baseline strategies for a single ticker.
+"""
+
+import argparse
+import sys
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+import json
+
+# Add parent directory to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from evaluation_long_short.baseline_strategies import get_all_baseline_strategies
+from evaluation_long_short.backtest import BacktestEngine, TradingAgentsBacktester, load_stock_data, standardize_single_ticker
+from evaluation_long_short.metrics import calculate_all_metrics, create_comparison_table, print_metrics
+from evaluation_long_short.visualize import plot_cumulative_returns_from_results
+
+from tradingagents.graph.trading_graph import TradingAgentsGraph
+from tradingagents.default_config import DEFAULT_CONFIG
+
+def is_debugging() -> bool:
+    try:
+        import debugpy
+        return debugpy.is_client_connected()
+    except Exception:
+        return False
+
+
+def save_strategy_actions_to_json(
+    portfolio: pd.DataFrame, 
+    strategy_name: str, 
+    ticker: str, 
+    start_date: str, 
+    end_date: str,
+    output_dir: str
+) -> None:
+    """
+    Save daily actions from a strategy to a JSON file.
+    
+    Args:
+        portfolio: Portfolio DataFrame with action, position, close, etc.
+        strategy_name: Name of the strategy
+        ticker: Stock ticker symbol
+        start_date: Start date of backtest
+        end_date: End date of backtest
+        output_dir: Directory to save the JSON file
+    """
+    out = Path(output_dir) / ticker / strategy_name
+    out.mkdir(parents=True, exist_ok=True)
+    
+    # Build actions list with relevant daily info
+    actions = []
+    for date, row in portfolio.iterrows():
+        date_str = date.strftime("%Y-%m-%d")
+        action_record = {
+            "date": date_str,
+            "action": int(row["action"]) if pd.notna(row["action"]) else 0,  # 1=BUY, 0=HOLD, -1=SELL
+            "position": int(row["position"]) if pd.notna(row["position"]) else 0,  # 1=long, 0=flat
+            "close_price": float(row["close"]) if pd.notna(row["close"]) else None,
+            "portfolio_value": float(row["portfolio_value"]) if pd.notna(row["portfolio_value"]) else None,
+            "strategy_return": float(row["strategy_return"]) if pd.notna(row["strategy_return"]) else 0.0,
+            "cumulative_return": float(row["cumulative_return"]) if pd.notna(row["cumulative_return"]) else 1.0
+        }
+        actions.append(action_record)
+    
+    # Save to JSON
+    fp = out / f"actions_{start_date}_to_{end_date}.json"
+    with open(fp, "w") as f:
+        json.dump({
+            "strategy": strategy_name,
+            "ticker": ticker,
+            "start_date": start_date,
+            "end_date": end_date,
+            "total_days": len(actions),
+            "actions": actions
+        }, f, indent=2)
+    
+    print(f"  ✓ Saved {strategy_name} actions to: {fp}")
+
+
+def run_evaluation(
+    ticker: str,
+    start_date: str,
+    end_date: str,
+    initial_capital: float = 100000,
+    include_tradingagents: bool = True,
+    output_dir: str = None,
+    config: dict = None
+):
+    """
+    Run complete evaluation: baselines + TradingAgents for a single ticker.
+    """
+    print(f"\n{'='*80}")
+    print(f"EVALUATION: {ticker} from {start_date} to {end_date}")
+    print(f"Initial Capital: ${initial_capital:,.2f}")
+    print(f"{'='*80}\n")
+
+    # Output dir
+    if output_dir is None:
+        output_dir = f"eval_results/{ticker}/{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+    out = Path(output_dir)
+    out.mkdir(parents=True, exist_ok=True)
+
+    # Load data
+    print("\n" + "="*80)
+    print("STEP 1: Loading Stock Data")
+    print("="*80)
+    data = load_stock_data(ticker, start_date, end_date)
+    data = standardize_single_ticker(data, ticker)
+
+    # Backtest engine
+    engine = BacktestEngine(data, initial_capital)
+
+    # Baselines
+    print("\n" + "="*80)
+    print("STEP 2: Running Baseline Strategies")
+    print("="*80)
+    baselines = get_all_baseline_strategies(initial_capital)
+
+    for name, strategy in baselines.items():
+        try:
+            print(f"\nRunning {name}...", end=" ")
+            portfolio = engine.run_strategy(strategy, start_date, end_date)
+            print("✓ Complete")
+            # Save actions to JSON
+            save_strategy_actions_to_json(portfolio, name, ticker, start_date, end_date, output_dir)
+        except Exception as e:
+            print(f"✗ Failed: {e}")
+
+    # TradingAgents
+    if include_tradingagents:
+        print("\n" + "="*80)
+        print("STEP 3: Running TradingAgents")
+        print("="*80)
+        try:
+            cfg = (config or DEFAULT_CONFIG).copy()
+            # Fast eval defaults (you can override from CLI)
+            cfg["deep_think_llm"] = cfg.get("deep_think_llm", "o4-mini")
+            cfg["quick_think_llm"] = cfg.get("quick_think_llm", "gpt-4o-mini")
+            cfg["max_debate_rounds"] = cfg.get("max_debate_rounds", 1)
+            cfg["max_risk_discuss_rounds"] = cfg.get("max_risk_discuss_rounds", 1)
+            # Deterministic-ish decoding for reproducibility
+            cfg.setdefault("llm_params", {}).update({"temperature": 0.7, "top_p": 1.0, "seed": 42})
+
+            print(f"\nInitializing TradingAgents...")
+            print(f"  Deep Thinking LLM: {cfg['deep_think_llm']}")
+            print(f"  Quick Thinking LLM: {cfg['quick_think_llm']}")
+            print(f"  Debate Rounds: {cfg['max_debate_rounds']}")
+
+            graph = TradingAgentsGraph(
+                selected_analysts=["market", "social", "news", "fundamentals"],
+                debug=False,
+                config=cfg
+            )
+            ta_backtester = TradingAgentsBacktester(graph, initial_capital, output_dir)
+            ta_portfolio = ta_backtester.backtest(ticker, start_date, end_date, data)
+
+            engine.results["TradingAgents"] = ta_portfolio
+            print("\n✓ TradingAgents backtest complete")
+            
+            # Save TradingAgents actions to JSON (in consistent format with baselines)
+            save_strategy_actions_to_json(ta_portfolio, "TradingAgents", ticker, start_date, end_date, output_dir)
+
+        except Exception as e:
+            print(f"\n✗ TradingAgents failed: {e}")
+            import traceback
+            traceback.print_exc()
+
+    # Metrics
+    print("\n" + "="*80)
+    print("STEP 4: Calculating Performance Metrics")
+    print("="*80)
+    all_metrics = {}
+    for name, portfolio in engine.results.items():
+        metrics = calculate_all_metrics(portfolio)
+        all_metrics[name] = metrics
+        print_metrics(metrics, name)
+
+    # Generate cumulative returns comparison plot
+    print("\n" + "="*80)
+    print("STEP 5: Generating Comparison Plot")
+    print("="*80)
+    try:
+        comparison_plot_path = str(out / ticker / "strategy_comparison.png")
+        plot_cumulative_returns_from_results(
+            results_dir=str(out / ticker),
+            ticker=ticker,
+            output_path=comparison_plot_path
+        )
+        # Also save as PDF
+        pdf_path = comparison_plot_path.replace('.png', '.pdf')
+        plot_cumulative_returns_from_results(
+            results_dir=str(out / ticker),
+            ticker=ticker,
+            output_path=pdf_path
+        )
+        print(f"\n✓ Comparison plot saved to:")
+        print(f"  - {comparison_plot_path}")
+        print(f"  - {pdf_path}")
+    except Exception as e:
+        print(f"\n✗ Failed to generate comparison plot: {e}")
+        import traceback
+        traceback.print_exc()
+
+    print("\n" + "="*80)
+    print("EVALUATION COMPLETE")
+    print("="*80)
+    print(f"\nResults saved to: {out}")
+    print(f"\nDaily actions JSON files saved for:")
+    for name in engine.results.keys():
+        print(f"  ✓ {name}")
+
+    return engine.results, all_metrics
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Run TradingAgents evaluation with baseline comparisons")
+    parser.add_argument("--ticker", type=str, help="Stock ticker symbol (e.g., AAPL)")
+    parser.add_argument("--start-date", type=str, required=True, help="Start date (YYYY-MM-DD)")
+    parser.add_argument("--end-date", type=str, required=True, help="End date (YYYY-MM-DD)")
+    parser.add_argument("--capital", type=float, default=100000, help="Initial capital (default: 100000)")
+    parser.add_argument("--skip-tradingagents", action="store_true", help="Skip TradingAgents evaluation")
+    parser.add_argument("--output-dir", type=str, default=None, help="Output directory for results")
+    parser.add_argument("--deep-llm", type=str, default="o4-mini", help="Deep thinking LLM model")
+    parser.add_argument("--quick-llm", type=str, default="gpt-4o-mini", help="Quick thinking LLM model")
+    parser.add_argument("--debate-rounds", type=int, default=1, help="Number of debate rounds (default: 1)")
+
+    # Used for debugging
+
+    if is_debugging():
+        config = DEFAULT_CONFIG.copy()
+        config.update({
+            "deep_think_llm": "o4-mini",
+            "quick_think_llm": "gpt-4o-mini",
+            "max_debate_rounds": 1,
+            "max_risk_discuss_rounds": 1,
+            "llm_params": {"temperature": 0.7, "top_p": 1.0, "seed": 42},
+        })
+        run_evaluation(
+            ticker="AAPL",
+            start_date="2024-01-01",
+            end_date="2024-01-10",
+            initial_capital=1000,
+            include_tradingagents=True,
+            output_dir="./evaluation_long_short/results",
+            config=config
+        )
+        return
+
+    # Build config
+    args = parser.parse_args()
+    config = DEFAULT_CONFIG.copy()
+    config["deep_think_llm"] = args.deep_llm
+    config["quick_think_llm"] = args.quick_llm
+    config["max_debate_rounds"] = args.debate_rounds
+    config["max_risk_discuss_rounds"] = args.debate_rounds
+    config.setdefault("llm_params", {}).update({"temperature": 0, "top_p": 1.0, "seed": 42})
+
+    run_evaluation(
+        ticker=args.ticker,
+        start_date=args.start_date,
+        end_date=args.end_date,
+        initial_capital=args.capital,
+        include_tradingagents=not args.skip_tradingagents,
+        output_dir=args.output_dir,
+        config=config
+    )
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/evaluation_long_short/visualize.py b/evaluation_long_short/visualize.py
new file mode 100644
index 00000000..416fa89a
--- /dev/null
+++ b/evaluation_long_short/visualize.py
@@ -0,0 +1,480 @@
+"""
+Visualization tools for trading strategy evaluation.
+Generates plots and reports for comparing TradingAgents with baseline strategies.
+"""
+
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from pathlib import Path
+from typing import Dict
+import warnings
+import json
+
+warnings.filterwarnings('ignore')
+
+# Try to import seaborn for better styling (optional)
+try:
+    import seaborn as sns
+    plt.style.use('seaborn-v0_8-darkgrid')
+    sns.set_palette("husl")
+    HAS_SEABORN = True
+except ImportError:
+    HAS_SEABORN = False
+    # Use default matplotlib styling
+    plt.rcParams['figure.facecolor'] = 'white'
+    plt.rcParams['axes.facecolor'] = 'white'
+    plt.rcParams['axes.grid'] = True
+
+
+def plot_cumulative_returns(
+    results: Dict[str, pd.DataFrame],
+    ticker: str,
+    output_path: str = None,
+    figsize: tuple = (14, 8)
+) -> plt.Figure:
+    """
+    Plot cumulative returns comparison for all strategies.
+    
+    Args:
+        results: Dictionary mapping strategy name to portfolio DataFrame
+        ticker: Stock ticker symbol
+        output_path: Path to save the figure (optional)
+        figsize: Figure size (width, height)
+        
+    Returns:
+        matplotlib Figure object
+    """
+    fig, ax = plt.subplots(figsize=figsize)
+    
+    for name, portfolio in results.items():
+        if "cumulative_return" in portfolio.columns:
+            cumulative = (portfolio["cumulative_return"] - 1) * 100  # Convert to percentage
+            ax.plot(portfolio.index, cumulative, label=name, linewidth=2, alpha=0.8)
+    
+    ax.set_xlabel('Date', fontsize=12, fontweight='bold')
+    ax.set_ylabel('Cumulative Return (%)', fontsize=12, fontweight='bold')
+    ax.set_title(f'{ticker} - Cumulative Returns Comparison', fontsize=14, fontweight='bold')
+    ax.legend(loc='best', fontsize=10, framealpha=0.9)
+    ax.grid(True, alpha=0.3)
+    ax.axhline(y=0, color='black', linestyle='--', linewidth=1, alpha=0.5)
+    
+    # Format y-axis as percentage
+    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.1f}%'))
+    
+    plt.tight_layout()
+    
+    if output_path:
+        fig.savefig(output_path, dpi=300, bbox_inches='tight')
+        print(f"✓ Saved cumulative returns plot to: {output_path}")
+    
+    return fig
+
+
+def plot_transaction_history(
+    portfolio: pd.DataFrame,
+    ticker: str,
+    strategy_name: str = "TradingAgents",
+    output_path: str = None,
+    figsize: tuple = (14, 10)
+) -> plt.Figure:
+    """
+    Plot transaction history with buy/sell signals overlaid on price chart.
+    
+    Args:
+        portfolio: Portfolio DataFrame with 'signal' and 'close' columns
+        ticker: Stock ticker symbol
+        strategy_name: Name of the strategy
+        output_path: Path to save the figure (optional)
+        figsize: Figure size (width, height)
+        
+    Returns:
+        matplotlib Figure object
+    """
+    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize, height_ratios=[2, 1])
+    
+    # Price chart with signals
+    ax1.plot(portfolio.index, portfolio["close"], label='Close Price', 
+             color='blue', linewidth=1.5, alpha=0.7)
+    
+    # Buy signals (signal == 1 and previous signal != 1)
+    signals = portfolio["signal"].copy()
+    buy_signals = (signals == 1) & (signals.shift(1) != 1)
+    sell_signals = (signals == -1) & (signals.shift(1) != -1)
+    
+    # Plot buy/sell markers
+    if buy_signals.any():
+        ax1.scatter(portfolio.index[buy_signals], 
+                   portfolio.loc[buy_signals, "close"],
+                   marker='^', color='green', s=100, label='Buy', 
+                   zorder=5, alpha=0.8)
+    
+    if sell_signals.any():
+        ax1.scatter(portfolio.index[sell_signals], 
+                   portfolio.loc[sell_signals, "close"],
+                   marker='v', color='red', s=100, label='Sell', 
+                   zorder=5, alpha=0.8)
+    
+    ax1.set_ylabel('Price ($)', fontsize=12, fontweight='bold')
+    ax1.set_title(f'{ticker} - {strategy_name} Transaction History', 
+                  fontsize=14, fontweight='bold')
+    ax1.legend(loc='best', fontsize=10)
+    ax1.grid(True, alpha=0.3)
+    
+    # Portfolio value
+    ax2.plot(portfolio.index, portfolio["portfolio_value"], 
+             label='Portfolio Value', color='purple', linewidth=2)
+    ax2.fill_between(portfolio.index, portfolio["portfolio_value"], 
+                      alpha=0.3, color='purple')
+    ax2.set_xlabel('Date', fontsize=12, fontweight='bold')
+    ax2.set_ylabel('Portfolio Value ($)', fontsize=12, fontweight='bold')
+    ax2.legend(loc='best', fontsize=10)
+    ax2.grid(True, alpha=0.3)
+    
+    # Format y-axis as currency
+    ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'${y:,.0f}'))
+    
+    plt.tight_layout()
+    
+    if output_path:
+        fig.savefig(output_path, dpi=300, bbox_inches='tight')
+        print(f"✓ Saved transaction history plot to: {output_path}")
+    
+    return fig
+
+
+def plot_metrics_comparison(
+    comparison_df: pd.DataFrame,
+    ticker: str,
+    output_path: str = None,
+    figsize: tuple = (16, 10)
+) -> plt.Figure:
+    """
+    Create bar charts comparing key metrics across strategies.
+    
+    Args:
+        comparison_df: DataFrame with strategies as rows and metrics as columns
+        ticker: Stock ticker symbol
+        output_path: Path to save the figure (optional)
+        figsize: Figure size (width, height)
+        
+    Returns:
+        matplotlib Figure object
+    """
+    # Select key metrics (matching paper's Table 1)
+    metrics_to_plot = [
+        "Cumulative Return (%)",
+        "Annualized Return (%)",
+        "Sharpe Ratio",
+        "Maximum Drawdown (%)"
+    ]
+    
+    # Filter to available metrics
+    available_metrics = [m for m in metrics_to_plot if m in comparison_df.columns]
+    
+    if not available_metrics:
+        raise ValueError("No matching metrics found in comparison DataFrame")
+    
+    n_metrics = len(available_metrics)
+    fig, axes = plt.subplots(2, 2, figsize=figsize)
+    axes = axes.flatten()
+    
+    for idx, metric in enumerate(available_metrics):
+        ax = axes[idx]
+        data = comparison_df[metric].sort_values(ascending=False)
+        
+        # Color code: TradingAgents in different color
+        colors = ['#FF6B6B' if name == 'TradingAgents' else '#4ECDC4' 
+                  for name in data.index]
+        
+        bars = ax.barh(range(len(data)), data.values, color=colors, alpha=0.8)
+        ax.set_yticks(range(len(data)))
+        ax.set_yticklabels(data.index, fontsize=10)
+        ax.set_xlabel(metric, fontsize=11, fontweight='bold')
+        ax.set_title(metric, fontsize=12, fontweight='bold')
+        ax.grid(True, alpha=0.3, axis='x')
+        
+        # Add value labels on bars
+        for i, (bar, value) in enumerate(zip(bars, data.values)):
+            if "Ratio" in metric:
+                label = f'{value:.2f}'
+            else:
+                label = f'{value:.1f}%'
+            ax.text(value, bar.get_y() + bar.get_height()/2, 
+                   f'  {label}', va='center', fontsize=9)
+    
+    # Hide unused subplots
+    for idx in range(n_metrics, 4):
+        axes[idx].axis('off')
+    
+    fig.suptitle(f'{ticker} - Performance Metrics Comparison', 
+                 fontsize=16, fontweight='bold', y=0.995)
+    plt.tight_layout()
+    
+    if output_path:
+        fig.savefig(output_path, dpi=300, bbox_inches='tight')
+        print(f"✓ Saved metrics comparison plot to: {output_path}")
+    
+    return fig
+
+
+def plot_drawdown(
+    results: Dict[str, pd.DataFrame],
+    ticker: str,
+    output_path: str = None,
+    figsize: tuple = (14, 8)
+) -> plt.Figure:
+    """
+    Plot drawdown analysis for all strategies.
+    
+    Args:
+        results: Dictionary mapping strategy name to portfolio DataFrame
+        ticker: Stock ticker symbol
+        output_path: Path to save the figure (optional)
+        figsize: Figure size (width, height)
+        
+    Returns:
+        matplotlib Figure object
+    """
+    fig, ax = plt.subplots(figsize=figsize)
+    
+    for name, portfolio in results.items():
+        if "portfolio_value" in portfolio.columns:
+            values = portfolio["portfolio_value"]
+            running_max = values.cummax()
+            drawdown = (values - running_max) / running_max * 100
+            ax.plot(portfolio.index, drawdown, label=name, linewidth=2, alpha=0.7)
+    
+    ax.set_xlabel('Date', fontsize=12, fontweight='bold')
+    ax.set_ylabel('Drawdown (%)', fontsize=12, fontweight='bold')
+    ax.set_title(f'{ticker} - Drawdown Analysis', fontsize=14, fontweight='bold')
+    ax.legend(loc='best', fontsize=10, framealpha=0.9)
+    ax.grid(True, alpha=0.3)
+    ax.axhline(y=0, color='black', linestyle='--', linewidth=1, alpha=0.5)
+    
+    # Fill drawdown areas
+    for name, portfolio in results.items():
+        if "portfolio_value" in portfolio.columns:
+            values = portfolio["portfolio_value"]
+            running_max = values.cummax()
+            drawdown = (values - running_max) / running_max * 100
+            ax.fill_between(portfolio.index, drawdown, 0, alpha=0.1)
+    
+    # Format y-axis as percentage
+    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.1f}%'))
+    
+    plt.tight_layout()
+    
+    if output_path:
+        fig.savefig(output_path, dpi=300, bbox_inches='tight')
+        print(f"✓ Saved drawdown plot to: {output_path}")
+    
+    return fig
+
+
+def plot_returns_distribution(
+    results: Dict[str, pd.DataFrame],
+    ticker: str,
+    output_path: str = None,
+    figsize: tuple = (14, 8)
+) -> plt.Figure:
+    """
+    Plot distribution of daily returns for all strategies.
+    
+    Args:
+        results: Dictionary mapping strategy name to portfolio DataFrame
+        ticker: Stock ticker symbol
+        output_path: Path to save the figure (optional)
+        figsize: Figure size (width, height)
+        
+    Returns:
+        matplotlib Figure object
+    """
+    fig, ax = plt.subplots(figsize=figsize)
+    
+    for name, portfolio in results.items():
+        if "strategy_return" in portfolio.columns:
+            returns = portfolio["strategy_return"].dropna() * 100  # Convert to percentage
+            ax.hist(returns, bins=50, alpha=0.5, label=name, density=True)
+    
+    ax.set_xlabel('Daily Return (%)', fontsize=12, fontweight='bold')
+    ax.set_ylabel('Density', fontsize=12, fontweight='bold')
+    ax.set_title(f'{ticker} - Returns Distribution', fontsize=14, fontweight='bold')
+    ax.legend(loc='best', fontsize=10)
+    ax.grid(True, alpha=0.3)
+    ax.axvline(x=0, color='black', linestyle='--', linewidth=1, alpha=0.5)
+    
+    plt.tight_layout()
+    
+    if output_path:
+        fig.savefig(output_path, dpi=300, bbox_inches='tight')
+        print(f"✓ Saved returns distribution plot to: {output_path}")
+    
+    return fig
+
+
+def create_summary_report(
+    ticker: str,
+    results: Dict[str, pd.DataFrame],
+    comparison_df: pd.DataFrame,
+    output_dir: str
+) -> None:
+    """
+    Generate comprehensive visual summary report.
+    Creates all standard plots and saves them to output directory.
+    
+    Args:
+        ticker: Stock ticker symbol
+        results: Dictionary mapping strategy name to portfolio DataFrame
+        comparison_df: DataFrame with performance metrics comparison
+        output_dir: Directory to save output files
+    """
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+    
+    print("\nGenerating visualizations...")
+    
+    # 1. Cumulative Returns
+    try:
+        plot_cumulative_returns(
+            results, 
+            ticker,
+            output_path=str(output_path / f"{ticker}_cumulative_returns.png")
+        )
+    except Exception as e:
+        print(f"✗ Failed to generate cumulative returns plot: {e}")
+    
+    # 2. Metrics Comparison
+    try:
+        plot_metrics_comparison(
+            comparison_df, 
+            ticker,
+            output_path=str(output_path / f"{ticker}_metrics_comparison.png")
+        )
+    except Exception as e:
+        print(f"✗ Failed to generate metrics comparison plot: {e}")
+    
+    # 3. Drawdown Analysis
+    try:
+        plot_drawdown(
+            results, 
+            ticker,
+            output_path=str(output_path / f"{ticker}_drawdown.png")
+        )
+    except Exception as e:
+        print(f"✗ Failed to generate drawdown plot: {e}")
+    
+    # 4. Transaction History (if TradingAgents results available)
+    if "TradingAgents" in results:
+        try:
+            plot_transaction_history(
+                results["TradingAgents"],
+                ticker,
+                strategy_name="TradingAgents",
+                output_path=str(output_path / f"{ticker}_TradingAgents_transactions.png")
+            )
+        except Exception as e:
+            print(f"✗ Failed to generate transaction history plot: {e}")
+    
+    # 5. Returns Distribution
+    try:
+        plot_returns_distribution(
+            results, 
+            ticker,
+            output_path=str(output_path / f"{ticker}_returns_distribution.png")
+        )
+    except Exception as e:
+        print(f"✗ Failed to generate returns distribution plot: {e}")
+    
+    print(f"\n✓ All visualizations saved to: {output_dir}")
+
+
+def plot_cumulative_returns_from_results(
+    results_dir: str,
+    ticker: str,
+    output_path: str = None,
+    figsize: tuple = (12, 7)
+) -> plt.Figure:
+    """
+    Plot cumulative returns comparison from saved JSON results.
+    
+    Args:
+        results_dir: Directory containing strategy result folders
+        ticker: Stock ticker symbol
+        output_path: Path to save the figure (optional)
+        figsize: Figure size (width, height)
+        
+    Returns:
+        matplotlib Figure object
+    """
+    results_path = Path(results_dir)
+    
+    # Define strategies to load
+    strategies = {
+        'BuyAndHold': 'BuyAndHoldStrategy',
+        'MACD': 'MACDStrategy',
+        'KDJ&RSI': 'KDJRSIStrategy',
+        'ZMR': 'ZMRStrategy',
+        'SMA': 'SMAStrategy',
+        'TradingAgents': 'TradingAgents'
+    }
+    
+    fig, ax = plt.subplots(figsize=figsize)
+    
+    # Load and plot each strategy
+    for folder_name, display_name in strategies.items():
+        strategy_dir = results_path / folder_name
+        if not strategy_dir.exists():
+            continue
+            
+        # Find actions JSON file
+        action_files = list(strategy_dir.glob("actions_*.json"))
+        if not action_files:
+            continue
+        
+        try:
+            # Load data
+            with open(action_files[0], 'r') as f:
+                data = json.load(f)
+            
+            # Extract date and cumulative_return
+            dates = pd.to_datetime([action['date'] for action in data['actions']])
+            cumulative_returns = [action['cumulative_return'] for action in data['actions']]
+            
+            # Plot
+            linewidth = 2.5 if display_name == 'TradingAgents' else 1.5
+            ax.plot(dates, cumulative_returns, label=display_name, 
+                   linewidth=linewidth, alpha=0.9)
+            
+        except Exception as e:
+            print(f"Warning: Failed to load {display_name}: {e}")
+    
+    ax.set_xlabel('Date', fontsize=12)
+    ax.set_ylabel('Cumulative Return', fontsize=12)
+    ax.set_title(f'Strategy Comparison - Cumulative Returns for {ticker}', 
+                 fontsize=14, fontweight='bold')
+    ax.legend(title='Strategies', loc='best', fontsize=10, framealpha=0.9)
+    ax.grid(True, alpha=0.3, linestyle='--')
+    ax.axhline(y=1.0, color='black', linestyle='--', linewidth=1, alpha=0.5)
+    
+    plt.tight_layout()
+    
+    if output_path:
+        fig.savefig(output_path, dpi=300, bbox_inches='tight')
+        print(f"✓ Saved cumulative returns comparison to: {output_path}")
+    
+    return fig
+
+
+if __name__ == "__main__":
+    # Example usage / testing
+    print("Visualization module loaded successfully!")
+    print("\nAvailable functions:")
+    print("  - plot_cumulative_returns")
+    print("  - plot_cumulative_returns_from_results")
+    print("  - plot_transaction_history")
+    print("  - plot_metrics_comparison")
+    print("  - plot_drawdown")
+    print("  - plot_returns_distribution")
+    print("  - create_summary_report")
+