From ffff3050c896d7e96ed60e04c74148f3aa226fc0 Mon Sep 17 00:00:00 2001 From: Quanliang Liu Date: Thu, 6 Nov 2025 16:57:23 -0600 Subject: [PATCH] Modified evaluation logic for tradingagent to align with rule-based methods --- evaluation/backtest.py | 141 ++++++++++++++++----------- evaluation/run_evaluation.py | 97 ++++++++++++------ requirements.txt | 1 + tradingagents/default_config.py | 9 ++ tradingagents/graph/trading_graph.py | 12 ++- 5 files changed, 172 insertions(+), 88 deletions(-) diff --git a/evaluation/backtest.py b/evaluation/backtest.py index 5a6f12c9..77347c07 100644 --- a/evaluation/backtest.py +++ b/evaluation/backtest.py @@ -1,8 +1,16 @@ """ Backtesting engine for TradingAgents and baseline strategies. + +Both TradingAgents and rule-based strategies use identical return calculation logic: + 1. Generate signals/actions: 1 (BUY), 0 (HOLD), -1 (SELL) + 2. Convert actions to positions: 1 (long), 0 (flat) + 3. Calculate returns: strategy_return = position.shift(1) * market_return + +This ensures apples-to-apples comparison across all strategies. """ import pandas as pd +import numpy as np from typing import Dict, List from pathlib import Path import json @@ -14,36 +22,37 @@ STD_FIELDS = {"Open", "High", "Low", "Close", "Adj Close", "Volume"} class TradingAgentsBacktester: """Backtest engine for TradingAgents framework.""" - def __init__(self, trading_agents_graph, initial_capital=100000): + def __init__(self, trading_agents_graph, initial_capital=100000, output_dir=None): self.graph = trading_agents_graph self.initial_capital = float(initial_capital) self.name = "TradingAgents" + self.output_dir = output_dir def backtest(self, ticker: str, start_date: str, end_date: str, data: pd.DataFrame) -> pd.DataFrame: + """ + Backtest TradingAgents using the same return calculation logic as rule-based strategies. + + Process: + 1. Collect signals (actions: 1=BUY, 0=HOLD, -1=SELL) for all dates + 2. Convert actions to positions (0=flat, 1=long) using same logic as baselines + 3. Calculate returns as: strategy_return = position.shift(1) * market_return + """ # Restrict to window df = data.loc[start_date:end_date].copy() - portfolio = pd.DataFrame(index=df.index) - portfolio["close"] = df["Close"] - if "Volume" in df.columns: - portfolio["Volume"] = df["Volume"] - - portfolio["signal"] = 0 - portfolio["position"] = 0.0 - portfolio["cash"] = self.initial_capital - portfolio["shares"] = 0.0 - portfolio["portfolio_value"] = self.initial_capital - + decisions: List[Dict] = [] + signals = pd.Series(0, index=df.index, dtype=float) print(f"\nRunning TradingAgents backtest on {ticker} from {start_date} to {end_date}") print(f"Total trading days: {len(df)}") print("-" * 80) + # Step 1: Collect all signals/decisions for i, (date, row) in enumerate(df.iterrows()): date_str = date.strftime("%Y-%m-%d") price = float(row["Close"]) - # Get decision + # Get decision from TradingAgents graph try: print(f"\n[{i+1}/{len(df)}] {date_str} ... ", end="") final_state, decision = self.graph.propagate(ticker, date_str) @@ -56,51 +65,60 @@ class TradingAgentsBacktester: signal = 0 decisions.append({"date": date_str, "decision": "ERROR", "signal": 0, "price": price, "error": str(e)}) - # Previous day state - if i > 0: - prev_cash = float(portfolio["cash"].iloc[i - 1]) - prev_shares = float(portfolio["shares"].iloc[i - 1]) - prev_pos = float(portfolio["position"].iloc[i - 1]) - else: - prev_cash = self.initial_capital - prev_shares = 0.0 - prev_pos = 0.0 + signals.loc[date] = signal - cash, shares, position = prev_cash, prev_shares, prev_pos - - # Execute: BUY opens/keeps long with all cash; SELL closes to cash; HOLD keeps. - if signal == 1 and prev_pos <= 0: - # Go long full notional - shares = cash / price if price > 0 else 0.0 - cash = 0.0 - position = 1.0 - elif signal == -1 and prev_pos > 0: - # Exit long to cash (no shorting here; paper's figs show short arrows, - # but transactions table is still long/flat in our public code) - cash = shares * price - shares = 0.0 - position = 0.0 - else: - # Hold current stance - position = prev_pos - - portval = cash + shares * price - - portfolio.loc[date, "signal"] = signal - portfolio.loc[date, "position"] = position - portfolio.loc[date, "cash"] = cash - portfolio.loc[date, "shares"] = shares - portfolio.loc[date, "portfolio_value"] = portval - - # Returns - portfolio["market_return"] = portfolio["close"].pct_change().fillna(0.0) - portfolio["portfolio_return"] = portfolio["portfolio_value"].pct_change().fillna(0.0) - portfolio["strategy_return"] = portfolio["portfolio_return"] - portfolio["cumulative_return"] = (1.0 + portfolio["strategy_return"]).cumprod() + # Step 2: Convert actions to positions (same logic as baseline strategies) + position = self._actions_to_position(signals) + + # Step 3: Calculate returns using standardized logic + close = pd.to_numeric(df["Close"], errors="coerce") + market_ret = close.pct_change().fillna(0.0) + exposure = position.shift(1).fillna(0.0) # Yesterday's position determines today's exposure + strat_ret = (exposure * market_ret).astype(float) + + cumret = (1.0 + strat_ret).cumprod() + portval = self.initial_capital * cumret + + # Build portfolio DataFrame with same structure as baseline strategies + portfolio = pd.DataFrame(index=df.index) + portfolio["action"] = signals # 1=BUY, 0=HOLD, -1=SELL + portfolio["position"] = position # 1=long, 0=flat + portfolio["close"] = close + if "Volume" in df.columns: + vol = df["Volume"] + if isinstance(vol, pd.DataFrame) and vol.shape[1] == 1: + vol = vol.iloc[:, 0] + if isinstance(vol, pd.Series): + portfolio["Volume"] = vol + portfolio["market_return"] = market_ret + portfolio["strategy_return"] = strat_ret + portfolio["cumulative_return"] = cumret + portfolio["portfolio_value"] = portval + portfolio["trade_delta"] = portfolio["position"].diff().fillna(0.0) # +1=buy, -1=sell self._save_decisions_log(ticker, decisions, start_date, end_date) return portfolio + @staticmethod + def _actions_to_position(actions: pd.Series) -> pd.Series: + """ + Convert action series to a long-only position series in {0,1}. + Same logic as baseline strategies for consistency. + """ + a = actions.astype(float).fillna(0.0).clip(-1, 1).values + pos = np.zeros_like(a, dtype=float) + for i in range(len(a)): + if i == 0: + pos[i] = 1.0 if a[i] > 0 else 0.0 + else: + if a[i] > 0: # buy → long + pos[i] = 1.0 + elif a[i] < 0: # sell → flat + pos[i] = 0.0 + else: # hold → keep previous + pos[i] = pos[i-1] + return pd.Series(pos, index=actions.index, name="position") + def _parse_decision(self, decision: str) -> int: """ Parse decision to signal. @@ -117,12 +135,23 @@ class TradingAgentsBacktester: return 0 def _save_decisions_log(self, ticker: str, decisions: List[Dict], start_date: str, end_date: str): - out = Path(f"eval_results/{ticker}/TradingAgents_backtest") + # Use output_dir if provided, otherwise use default + if self.output_dir: + out = Path(self.output_dir) / ticker / "TradingAgents" + else: + out = Path(f"eval_results/{ticker}/TradingAgents") out.mkdir(parents=True, exist_ok=True) fp = out / f"decisions_{start_date}_to_{end_date}.json" with open(fp, "w") as f: - json.dump(decisions, f, indent=2) - print(f"\nDecisions log saved to: {fp}") + json.dump({ + "strategy": "TradingAgents", + "ticker": ticker, + "start_date": start_date, + "end_date": end_date, + "total_days": len(decisions), + "decisions": decisions + }, f, indent=2) + print(f" ✓ Saved TradingAgents detailed decisions to: {fp}") class BacktestEngine: diff --git a/evaluation/run_evaluation.py b/evaluation/run_evaluation.py index 070799b2..7e579bbe 100644 --- a/evaluation/run_evaluation.py +++ b/evaluation/run_evaluation.py @@ -8,6 +8,7 @@ import sys from pathlib import Path from datetime import datetime import pandas as pd +import json # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent.parent)) @@ -15,7 +16,6 @@ sys.path.insert(0, str(Path(__file__).parent.parent)) from evaluation.baseline_strategies import get_all_baseline_strategies from evaluation.backtest import BacktestEngine, TradingAgentsBacktester, load_stock_data, standardize_single_ticker from evaluation.metrics import calculate_all_metrics, create_comparison_table, print_metrics -from evaluation.visualize import create_summary_report from tradingagents.graph.trading_graph import TradingAgentsGraph from tradingagents.default_config import DEFAULT_CONFIG @@ -28,6 +28,58 @@ def is_debugging() -> bool: return False +def save_strategy_actions_to_json( + portfolio: pd.DataFrame, + strategy_name: str, + ticker: str, + start_date: str, + end_date: str, + output_dir: str +) -> None: + """ + Save daily actions from a strategy to a JSON file. + + Args: + portfolio: Portfolio DataFrame with action, position, close, etc. + strategy_name: Name of the strategy + ticker: Stock ticker symbol + start_date: Start date of backtest + end_date: End date of backtest + output_dir: Directory to save the JSON file + """ + out = Path(output_dir) / ticker / strategy_name + out.mkdir(parents=True, exist_ok=True) + + # Build actions list with relevant daily info + actions = [] + for date, row in portfolio.iterrows(): + date_str = date.strftime("%Y-%m-%d") + action_record = { + "date": date_str, + "action": int(row["action"]) if pd.notna(row["action"]) else 0, # 1=BUY, 0=HOLD, -1=SELL + "position": int(row["position"]) if pd.notna(row["position"]) else 0, # 1=long, 0=flat + "close_price": float(row["close"]) if pd.notna(row["close"]) else None, + "portfolio_value": float(row["portfolio_value"]) if pd.notna(row["portfolio_value"]) else None, + "strategy_return": float(row["strategy_return"]) if pd.notna(row["strategy_return"]) else 0.0, + "cumulative_return": float(row["cumulative_return"]) if pd.notna(row["cumulative_return"]) else 1.0 + } + actions.append(action_record) + + # Save to JSON + fp = out / f"actions_{start_date}_to_{end_date}.json" + with open(fp, "w") as f: + json.dump({ + "strategy": strategy_name, + "ticker": ticker, + "start_date": start_date, + "end_date": end_date, + "total_days": len(actions), + "actions": actions + }, f, indent=2) + + print(f" ✓ Saved {strategy_name} actions to: {fp}") + + def run_evaluation( ticker: str, start_date: str, @@ -72,6 +124,8 @@ def run_evaluation( print(f"\nRunning {name}...", end=" ") portfolio = engine.run_strategy(strategy, start_date, end_date) print("✓ Complete") + # Save actions to JSON + save_strategy_actions_to_json(portfolio, name, ticker, start_date, end_date, output_dir) except Exception as e: print(f"✗ Failed: {e}") @@ -100,11 +154,14 @@ def run_evaluation( debug=False, config=cfg ) - ta_backtester = TradingAgentsBacktester(graph, initial_capital) + ta_backtester = TradingAgentsBacktester(graph, initial_capital, output_dir) ta_portfolio = ta_backtester.backtest(ticker, start_date, end_date, data) engine.results["TradingAgents"] = ta_portfolio print("\n✓ TradingAgents backtest complete") + + # Save TradingAgents actions to JSON (in consistent format with baselines) + save_strategy_actions_to_json(ta_portfolio, "TradingAgents", ticker, start_date, end_date, output_dir) except Exception as e: print(f"\n✗ TradingAgents failed: {e}") @@ -121,35 +178,15 @@ def run_evaluation( all_metrics[name] = metrics print_metrics(metrics, name) - comparison_df = create_comparison_table(all_metrics) - - print("\n" + "="*80) - print("PERFORMANCE COMPARISON TABLE") - print("="*80) - print(comparison_df.to_string()) - print("\n") - - comparison_df.to_csv(out / f"{ticker}_comparison.csv") - print(f"Comparison table saved to: {out / f'{ticker}_comparison.csv'}") - - # Visuals - print("\n" + "="*80) - print("STEP 5: Generating Visualizations") - print("="*80) - create_summary_report(ticker, engine.results, comparison_df, output_dir) - print("\n" + "="*80) print("EVALUATION COMPLETE") print("="*80) print(f"\nResults saved to: {out}") - print(f" - Comparison table: {ticker}_comparison.csv") - print(f" - Cumulative returns plot: {ticker}_cumulative_returns.png") - print(f" - Metrics comparison: {ticker}_metrics_comparison.png") - if include_tradingagents and "TradingAgents" in engine.results: - print(f" - Transaction history: {ticker}_TradingAgents_transactions.png") - print(f" - Drawdown analysis: {ticker}_drawdown.png") + print(f"\nDaily actions JSON files saved for:") + for name in engine.results.keys(): + print(f" ✓ {name}") - return engine.results, comparison_df + return engine.results, all_metrics def main(): @@ -158,10 +195,10 @@ def main(): parser.add_argument("--start-date", type=str, required=True, help="Start date (YYYY-MM-DD)") parser.add_argument("--end-date", type=str, required=True, help="End date (YYYY-MM-DD)") parser.add_argument("--capital", type=float, default=100000, help="Initial capital (default: 100000)") - parser.add_argument("--no-tradingagents", action="store_true", help="Skip TradingAgents") + parser.add_argument("--skip-tradingagents", action="store_true", help="Skip TradingAgents evaluation") parser.add_argument("--output-dir", type=str, default=None, help="Output directory for results") - parser.add_argument("--deep-llm", type=str, default="gpt-4o-mini", help="Deep thinking LLM model") - parser.add_argument("--quick-llm", type=str, default="gpt-5-nano", help="Quick thinking LLM model") + parser.add_argument("--deep-llm", type=str, default="o4-mini", help="Deep thinking LLM model") + parser.add_argument("--quick-llm", type=str, default="gpt-4o-mini", help="Quick thinking LLM model") parser.add_argument("--debate-rounds", type=int, default=1, help="Number of debate rounds (default: 1)") # Used for debugging @@ -200,7 +237,7 @@ def main(): start_date=args.start_date, end_date=args.end_date, initial_capital=args.capital, - include_tradingagents=not args.no_tradingagents, + include_tradingagents=not args.skip_tradingagents, output_dir=args.output_dir, config=config ) diff --git a/requirements.txt b/requirements.txt index 75bf1027..aed93944 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,3 +27,4 @@ langchain-google-genai matplotlib seaborn numpy +python-dotenv diff --git a/tradingagents/default_config.py b/tradingagents/default_config.py index 1f40a2a2..61331251 100644 --- a/tradingagents/default_config.py +++ b/tradingagents/default_config.py @@ -1,4 +1,12 @@ import os +from pathlib import Path +from dotenv import load_dotenv + +# Load environment variables from .env file +# Look for .env in the project root (parent of tradingagents directory) +project_root = Path(__file__).parent.parent +dotenv_path = project_root / ".env" +load_dotenv(dotenv_path=dotenv_path) DEFAULT_CONFIG = { "project_dir": os.path.abspath(os.path.join(os.path.dirname(__file__), ".")), @@ -13,6 +21,7 @@ DEFAULT_CONFIG = { "deep_think_llm": "o4-mini", "quick_think_llm": "gpt-4o-mini", "backend_url": "https://api.openai.com/v1", + "openai_api_key": os.getenv("OPENAI_API_KEY"), # Load from .env file # Debate and discussion settings "max_debate_rounds": 1, "max_risk_discuss_rounds": 1, diff --git a/tradingagents/graph/trading_graph.py b/tradingagents/graph/trading_graph.py index 40cdff75..01ab3fee 100644 --- a/tradingagents/graph/trading_graph.py +++ b/tradingagents/graph/trading_graph.py @@ -73,8 +73,16 @@ class TradingAgentsGraph: # Initialize LLMs if self.config["llm_provider"].lower() == "openai" or self.config["llm_provider"] == "ollama" or self.config["llm_provider"] == "openrouter": - self.deep_thinking_llm = ChatOpenAI(model=self.config["deep_think_llm"], base_url=self.config["backend_url"]) - self.quick_thinking_llm = ChatOpenAI(model=self.config["quick_think_llm"], base_url=self.config["backend_url"]) + self.deep_thinking_llm = ChatOpenAI( + model=self.config["deep_think_llm"], + base_url=self.config["backend_url"], + api_key=self.config.get("openai_api_key") + ) + self.quick_thinking_llm = ChatOpenAI( + model=self.config["quick_think_llm"], + base_url=self.config["backend_url"], + api_key=self.config.get("openai_api_key") + ) elif self.config["llm_provider"].lower() == "anthropic": self.deep_thinking_llm = ChatAnthropic(model=self.config["deep_think_llm"], base_url=self.config["backend_url"]) self.quick_thinking_llm = ChatAnthropic(model=self.config["quick_think_llm"], base_url=self.config["backend_url"])