Modified evaluation logic for tradingagent to align with rule-based methods

This commit is contained in:
Quanliang Liu 2025-11-06 16:57:23 -06:00
parent e42a7edea3
commit ffff3050c8
5 changed files with 172 additions and 88 deletions

View File

@ -1,8 +1,16 @@
""" """
Backtesting engine for TradingAgents and baseline strategies. Backtesting engine for TradingAgents and baseline strategies.
Both TradingAgents and rule-based strategies use identical return calculation logic:
1. Generate signals/actions: 1 (BUY), 0 (HOLD), -1 (SELL)
2. Convert actions to positions: 1 (long), 0 (flat)
3. Calculate returns: strategy_return = position.shift(1) * market_return
This ensures apples-to-apples comparison across all strategies.
""" """
import pandas as pd import pandas as pd
import numpy as np
from typing import Dict, List from typing import Dict, List
from pathlib import Path from pathlib import Path
import json import json
@ -14,36 +22,37 @@ STD_FIELDS = {"Open", "High", "Low", "Close", "Adj Close", "Volume"}
class TradingAgentsBacktester: class TradingAgentsBacktester:
"""Backtest engine for TradingAgents framework.""" """Backtest engine for TradingAgents framework."""
def __init__(self, trading_agents_graph, initial_capital=100000): def __init__(self, trading_agents_graph, initial_capital=100000, output_dir=None):
self.graph = trading_agents_graph self.graph = trading_agents_graph
self.initial_capital = float(initial_capital) self.initial_capital = float(initial_capital)
self.name = "TradingAgents" self.name = "TradingAgents"
self.output_dir = output_dir
def backtest(self, ticker: str, start_date: str, end_date: str, data: pd.DataFrame) -> pd.DataFrame: def backtest(self, ticker: str, start_date: str, end_date: str, data: pd.DataFrame) -> pd.DataFrame:
"""
Backtest TradingAgents using the same return calculation logic as rule-based strategies.
Process:
1. Collect signals (actions: 1=BUY, 0=HOLD, -1=SELL) for all dates
2. Convert actions to positions (0=flat, 1=long) using same logic as baselines
3. Calculate returns as: strategy_return = position.shift(1) * market_return
"""
# Restrict to window # Restrict to window
df = data.loc[start_date:end_date].copy() df = data.loc[start_date:end_date].copy()
portfolio = pd.DataFrame(index=df.index)
portfolio["close"] = df["Close"]
if "Volume" in df.columns:
portfolio["Volume"] = df["Volume"]
portfolio["signal"] = 0
portfolio["position"] = 0.0
portfolio["cash"] = self.initial_capital
portfolio["shares"] = 0.0
portfolio["portfolio_value"] = self.initial_capital
decisions: List[Dict] = [] decisions: List[Dict] = []
signals = pd.Series(0, index=df.index, dtype=float)
print(f"\nRunning TradingAgents backtest on {ticker} from {start_date} to {end_date}") print(f"\nRunning TradingAgents backtest on {ticker} from {start_date} to {end_date}")
print(f"Total trading days: {len(df)}") print(f"Total trading days: {len(df)}")
print("-" * 80) print("-" * 80)
# Step 1: Collect all signals/decisions
for i, (date, row) in enumerate(df.iterrows()): for i, (date, row) in enumerate(df.iterrows()):
date_str = date.strftime("%Y-%m-%d") date_str = date.strftime("%Y-%m-%d")
price = float(row["Close"]) price = float(row["Close"])
# Get decision # Get decision from TradingAgents graph
try: try:
print(f"\n[{i+1}/{len(df)}] {date_str} ... ", end="") print(f"\n[{i+1}/{len(df)}] {date_str} ... ", end="")
final_state, decision = self.graph.propagate(ticker, date_str) final_state, decision = self.graph.propagate(ticker, date_str)
@ -56,51 +65,60 @@ class TradingAgentsBacktester:
signal = 0 signal = 0
decisions.append({"date": date_str, "decision": "ERROR", "signal": 0, "price": price, "error": str(e)}) decisions.append({"date": date_str, "decision": "ERROR", "signal": 0, "price": price, "error": str(e)})
# Previous day state signals.loc[date] = signal
if i > 0:
prev_cash = float(portfolio["cash"].iloc[i - 1])
prev_shares = float(portfolio["shares"].iloc[i - 1])
prev_pos = float(portfolio["position"].iloc[i - 1])
else:
prev_cash = self.initial_capital
prev_shares = 0.0
prev_pos = 0.0
cash, shares, position = prev_cash, prev_shares, prev_pos # Step 2: Convert actions to positions (same logic as baseline strategies)
position = self._actions_to_position(signals)
# Execute: BUY opens/keeps long with all cash; SELL closes to cash; HOLD keeps.
if signal == 1 and prev_pos <= 0: # Step 3: Calculate returns using standardized logic
# Go long full notional close = pd.to_numeric(df["Close"], errors="coerce")
shares = cash / price if price > 0 else 0.0 market_ret = close.pct_change().fillna(0.0)
cash = 0.0 exposure = position.shift(1).fillna(0.0) # Yesterday's position determines today's exposure
position = 1.0 strat_ret = (exposure * market_ret).astype(float)
elif signal == -1 and prev_pos > 0:
# Exit long to cash (no shorting here; paper's figs show short arrows, cumret = (1.0 + strat_ret).cumprod()
# but transactions table is still long/flat in our public code) portval = self.initial_capital * cumret
cash = shares * price
shares = 0.0 # Build portfolio DataFrame with same structure as baseline strategies
position = 0.0 portfolio = pd.DataFrame(index=df.index)
else: portfolio["action"] = signals # 1=BUY, 0=HOLD, -1=SELL
# Hold current stance portfolio["position"] = position # 1=long, 0=flat
position = prev_pos portfolio["close"] = close
if "Volume" in df.columns:
portval = cash + shares * price vol = df["Volume"]
if isinstance(vol, pd.DataFrame) and vol.shape[1] == 1:
portfolio.loc[date, "signal"] = signal vol = vol.iloc[:, 0]
portfolio.loc[date, "position"] = position if isinstance(vol, pd.Series):
portfolio.loc[date, "cash"] = cash portfolio["Volume"] = vol
portfolio.loc[date, "shares"] = shares portfolio["market_return"] = market_ret
portfolio.loc[date, "portfolio_value"] = portval portfolio["strategy_return"] = strat_ret
portfolio["cumulative_return"] = cumret
# Returns portfolio["portfolio_value"] = portval
portfolio["market_return"] = portfolio["close"].pct_change().fillna(0.0) portfolio["trade_delta"] = portfolio["position"].diff().fillna(0.0) # +1=buy, -1=sell
portfolio["portfolio_return"] = portfolio["portfolio_value"].pct_change().fillna(0.0)
portfolio["strategy_return"] = portfolio["portfolio_return"]
portfolio["cumulative_return"] = (1.0 + portfolio["strategy_return"]).cumprod()
self._save_decisions_log(ticker, decisions, start_date, end_date) self._save_decisions_log(ticker, decisions, start_date, end_date)
return portfolio return portfolio
@staticmethod
def _actions_to_position(actions: pd.Series) -> pd.Series:
"""
Convert action series to a long-only position series in {0,1}.
Same logic as baseline strategies for consistency.
"""
a = actions.astype(float).fillna(0.0).clip(-1, 1).values
pos = np.zeros_like(a, dtype=float)
for i in range(len(a)):
if i == 0:
pos[i] = 1.0 if a[i] > 0 else 0.0
else:
if a[i] > 0: # buy → long
pos[i] = 1.0
elif a[i] < 0: # sell → flat
pos[i] = 0.0
else: # hold → keep previous
pos[i] = pos[i-1]
return pd.Series(pos, index=actions.index, name="position")
def _parse_decision(self, decision: str) -> int: def _parse_decision(self, decision: str) -> int:
""" """
Parse decision to signal. Parse decision to signal.
@ -117,12 +135,23 @@ class TradingAgentsBacktester:
return 0 return 0
def _save_decisions_log(self, ticker: str, decisions: List[Dict], start_date: str, end_date: str): def _save_decisions_log(self, ticker: str, decisions: List[Dict], start_date: str, end_date: str):
out = Path(f"eval_results/{ticker}/TradingAgents_backtest") # Use output_dir if provided, otherwise use default
if self.output_dir:
out = Path(self.output_dir) / ticker / "TradingAgents"
else:
out = Path(f"eval_results/{ticker}/TradingAgents")
out.mkdir(parents=True, exist_ok=True) out.mkdir(parents=True, exist_ok=True)
fp = out / f"decisions_{start_date}_to_{end_date}.json" fp = out / f"decisions_{start_date}_to_{end_date}.json"
with open(fp, "w") as f: with open(fp, "w") as f:
json.dump(decisions, f, indent=2) json.dump({
print(f"\nDecisions log saved to: {fp}") "strategy": "TradingAgents",
"ticker": ticker,
"start_date": start_date,
"end_date": end_date,
"total_days": len(decisions),
"decisions": decisions
}, f, indent=2)
print(f" ✓ Saved TradingAgents detailed decisions to: {fp}")
class BacktestEngine: class BacktestEngine:

View File

@ -8,6 +8,7 @@ import sys
from pathlib import Path from pathlib import Path
from datetime import datetime from datetime import datetime
import pandas as pd import pandas as pd
import json
# Add parent directory to path # Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent)) sys.path.insert(0, str(Path(__file__).parent.parent))
@ -15,7 +16,6 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
from evaluation.baseline_strategies import get_all_baseline_strategies from evaluation.baseline_strategies import get_all_baseline_strategies
from evaluation.backtest import BacktestEngine, TradingAgentsBacktester, load_stock_data, standardize_single_ticker from evaluation.backtest import BacktestEngine, TradingAgentsBacktester, load_stock_data, standardize_single_ticker
from evaluation.metrics import calculate_all_metrics, create_comparison_table, print_metrics from evaluation.metrics import calculate_all_metrics, create_comparison_table, print_metrics
from evaluation.visualize import create_summary_report
from tradingagents.graph.trading_graph import TradingAgentsGraph from tradingagents.graph.trading_graph import TradingAgentsGraph
from tradingagents.default_config import DEFAULT_CONFIG from tradingagents.default_config import DEFAULT_CONFIG
@ -28,6 +28,58 @@ def is_debugging() -> bool:
return False return False
def save_strategy_actions_to_json(
portfolio: pd.DataFrame,
strategy_name: str,
ticker: str,
start_date: str,
end_date: str,
output_dir: str
) -> None:
"""
Save daily actions from a strategy to a JSON file.
Args:
portfolio: Portfolio DataFrame with action, position, close, etc.
strategy_name: Name of the strategy
ticker: Stock ticker symbol
start_date: Start date of backtest
end_date: End date of backtest
output_dir: Directory to save the JSON file
"""
out = Path(output_dir) / ticker / strategy_name
out.mkdir(parents=True, exist_ok=True)
# Build actions list with relevant daily info
actions = []
for date, row in portfolio.iterrows():
date_str = date.strftime("%Y-%m-%d")
action_record = {
"date": date_str,
"action": int(row["action"]) if pd.notna(row["action"]) else 0, # 1=BUY, 0=HOLD, -1=SELL
"position": int(row["position"]) if pd.notna(row["position"]) else 0, # 1=long, 0=flat
"close_price": float(row["close"]) if pd.notna(row["close"]) else None,
"portfolio_value": float(row["portfolio_value"]) if pd.notna(row["portfolio_value"]) else None,
"strategy_return": float(row["strategy_return"]) if pd.notna(row["strategy_return"]) else 0.0,
"cumulative_return": float(row["cumulative_return"]) if pd.notna(row["cumulative_return"]) else 1.0
}
actions.append(action_record)
# Save to JSON
fp = out / f"actions_{start_date}_to_{end_date}.json"
with open(fp, "w") as f:
json.dump({
"strategy": strategy_name,
"ticker": ticker,
"start_date": start_date,
"end_date": end_date,
"total_days": len(actions),
"actions": actions
}, f, indent=2)
print(f" ✓ Saved {strategy_name} actions to: {fp}")
def run_evaluation( def run_evaluation(
ticker: str, ticker: str,
start_date: str, start_date: str,
@ -72,6 +124,8 @@ def run_evaluation(
print(f"\nRunning {name}...", end=" ") print(f"\nRunning {name}...", end=" ")
portfolio = engine.run_strategy(strategy, start_date, end_date) portfolio = engine.run_strategy(strategy, start_date, end_date)
print("✓ Complete") print("✓ Complete")
# Save actions to JSON
save_strategy_actions_to_json(portfolio, name, ticker, start_date, end_date, output_dir)
except Exception as e: except Exception as e:
print(f"✗ Failed: {e}") print(f"✗ Failed: {e}")
@ -100,11 +154,14 @@ def run_evaluation(
debug=False, debug=False,
config=cfg config=cfg
) )
ta_backtester = TradingAgentsBacktester(graph, initial_capital) ta_backtester = TradingAgentsBacktester(graph, initial_capital, output_dir)
ta_portfolio = ta_backtester.backtest(ticker, start_date, end_date, data) ta_portfolio = ta_backtester.backtest(ticker, start_date, end_date, data)
engine.results["TradingAgents"] = ta_portfolio engine.results["TradingAgents"] = ta_portfolio
print("\n✓ TradingAgents backtest complete") print("\n✓ TradingAgents backtest complete")
# Save TradingAgents actions to JSON (in consistent format with baselines)
save_strategy_actions_to_json(ta_portfolio, "TradingAgents", ticker, start_date, end_date, output_dir)
except Exception as e: except Exception as e:
print(f"\n✗ TradingAgents failed: {e}") print(f"\n✗ TradingAgents failed: {e}")
@ -121,35 +178,15 @@ def run_evaluation(
all_metrics[name] = metrics all_metrics[name] = metrics
print_metrics(metrics, name) print_metrics(metrics, name)
comparison_df = create_comparison_table(all_metrics)
print("\n" + "="*80)
print("PERFORMANCE COMPARISON TABLE")
print("="*80)
print(comparison_df.to_string())
print("\n")
comparison_df.to_csv(out / f"{ticker}_comparison.csv")
print(f"Comparison table saved to: {out / f'{ticker}_comparison.csv'}")
# Visuals
print("\n" + "="*80)
print("STEP 5: Generating Visualizations")
print("="*80)
create_summary_report(ticker, engine.results, comparison_df, output_dir)
print("\n" + "="*80) print("\n" + "="*80)
print("EVALUATION COMPLETE") print("EVALUATION COMPLETE")
print("="*80) print("="*80)
print(f"\nResults saved to: {out}") print(f"\nResults saved to: {out}")
print(f" - Comparison table: {ticker}_comparison.csv") print(f"\nDaily actions JSON files saved for:")
print(f" - Cumulative returns plot: {ticker}_cumulative_returns.png") for name in engine.results.keys():
print(f" - Metrics comparison: {ticker}_metrics_comparison.png") print(f"{name}")
if include_tradingagents and "TradingAgents" in engine.results:
print(f" - Transaction history: {ticker}_TradingAgents_transactions.png")
print(f" - Drawdown analysis: {ticker}_drawdown.png")
return engine.results, comparison_df return engine.results, all_metrics
def main(): def main():
@ -158,10 +195,10 @@ def main():
parser.add_argument("--start-date", type=str, required=True, help="Start date (YYYY-MM-DD)") parser.add_argument("--start-date", type=str, required=True, help="Start date (YYYY-MM-DD)")
parser.add_argument("--end-date", type=str, required=True, help="End date (YYYY-MM-DD)") parser.add_argument("--end-date", type=str, required=True, help="End date (YYYY-MM-DD)")
parser.add_argument("--capital", type=float, default=100000, help="Initial capital (default: 100000)") parser.add_argument("--capital", type=float, default=100000, help="Initial capital (default: 100000)")
parser.add_argument("--no-tradingagents", action="store_true", help="Skip TradingAgents") parser.add_argument("--skip-tradingagents", action="store_true", help="Skip TradingAgents evaluation")
parser.add_argument("--output-dir", type=str, default=None, help="Output directory for results") parser.add_argument("--output-dir", type=str, default=None, help="Output directory for results")
parser.add_argument("--deep-llm", type=str, default="gpt-4o-mini", help="Deep thinking LLM model") parser.add_argument("--deep-llm", type=str, default="o4-mini", help="Deep thinking LLM model")
parser.add_argument("--quick-llm", type=str, default="gpt-5-nano", help="Quick thinking LLM model") parser.add_argument("--quick-llm", type=str, default="gpt-4o-mini", help="Quick thinking LLM model")
parser.add_argument("--debate-rounds", type=int, default=1, help="Number of debate rounds (default: 1)") parser.add_argument("--debate-rounds", type=int, default=1, help="Number of debate rounds (default: 1)")
# Used for debugging # Used for debugging
@ -200,7 +237,7 @@ def main():
start_date=args.start_date, start_date=args.start_date,
end_date=args.end_date, end_date=args.end_date,
initial_capital=args.capital, initial_capital=args.capital,
include_tradingagents=not args.no_tradingagents, include_tradingagents=not args.skip_tradingagents,
output_dir=args.output_dir, output_dir=args.output_dir,
config=config config=config
) )

View File

@ -27,3 +27,4 @@ langchain-google-genai
matplotlib matplotlib
seaborn seaborn
numpy numpy
python-dotenv

View File

@ -1,4 +1,12 @@
import os import os
from pathlib import Path
from dotenv import load_dotenv
# Load environment variables from .env file
# Look for .env in the project root (parent of tradingagents directory)
project_root = Path(__file__).parent.parent
dotenv_path = project_root / ".env"
load_dotenv(dotenv_path=dotenv_path)
DEFAULT_CONFIG = { DEFAULT_CONFIG = {
"project_dir": os.path.abspath(os.path.join(os.path.dirname(__file__), ".")), "project_dir": os.path.abspath(os.path.join(os.path.dirname(__file__), ".")),
@ -13,6 +21,7 @@ DEFAULT_CONFIG = {
"deep_think_llm": "o4-mini", "deep_think_llm": "o4-mini",
"quick_think_llm": "gpt-4o-mini", "quick_think_llm": "gpt-4o-mini",
"backend_url": "https://api.openai.com/v1", "backend_url": "https://api.openai.com/v1",
"openai_api_key": os.getenv("OPENAI_API_KEY"), # Load from .env file
# Debate and discussion settings # Debate and discussion settings
"max_debate_rounds": 1, "max_debate_rounds": 1,
"max_risk_discuss_rounds": 1, "max_risk_discuss_rounds": 1,

View File

@ -73,8 +73,16 @@ class TradingAgentsGraph:
# Initialize LLMs # Initialize LLMs
if self.config["llm_provider"].lower() == "openai" or self.config["llm_provider"] == "ollama" or self.config["llm_provider"] == "openrouter": if self.config["llm_provider"].lower() == "openai" or self.config["llm_provider"] == "ollama" or self.config["llm_provider"] == "openrouter":
self.deep_thinking_llm = ChatOpenAI(model=self.config["deep_think_llm"], base_url=self.config["backend_url"]) self.deep_thinking_llm = ChatOpenAI(
self.quick_thinking_llm = ChatOpenAI(model=self.config["quick_think_llm"], base_url=self.config["backend_url"]) model=self.config["deep_think_llm"],
base_url=self.config["backend_url"],
api_key=self.config.get("openai_api_key")
)
self.quick_thinking_llm = ChatOpenAI(
model=self.config["quick_think_llm"],
base_url=self.config["backend_url"],
api_key=self.config.get("openai_api_key")
)
elif self.config["llm_provider"].lower() == "anthropic": elif self.config["llm_provider"].lower() == "anthropic":
self.deep_thinking_llm = ChatAnthropic(model=self.config["deep_think_llm"], base_url=self.config["backend_url"]) self.deep_thinking_llm = ChatAnthropic(model=self.config["deep_think_llm"], base_url=self.config["backend_url"])
self.quick_thinking_llm = ChatAnthropic(model=self.config["quick_think_llm"], base_url=self.config["backend_url"]) self.quick_thinking_llm = ChatAnthropic(model=self.config["quick_think_llm"], base_url=self.config["backend_url"])