From 42ef175ddb124826dce7c0f93251a2e999a84a45 Mon Sep 17 00:00:00 2001
From: MarkLo <yao920127@gmail.com>
Date: Mon, 1 Dec 2025 03:50:43 +0800
Subject: [PATCH] feat: add retry mechanism for Anthropic API 529 errors

- Add tenacity-based retry logic to Research Manager
- Configure exponential backoff (2-10s) for OverloadedError
- Maximum 3 retry attempts with warning-level logging
- Improves system stability against temporary API overload
- Fix: correct import path for OverloadedError from anthropic._exceptions
---
 .../agents/managers/research_manager.py       | 41 ++++++++++++++++++-
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/tradingagents/agents/managers/research_manager.py b/tradingagents/agents/managers/research_manager.py
index 8e920d5d..953e179a 100644
--- a/tradingagents/agents/managers/research_manager.py
+++ b/tradingagents/agents/managers/research_manager.py
@@ -1,8 +1,20 @@
 # -*- coding: utf-8 -*-
 import time
 import json
+import logging
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type,
+    before_sleep_log
+)
+from anthropic._exceptions import OverloadedError
 from tradingagents.agents.utils.output_filter import fix_common_llm_errors, validate_and_warn
 
+# 設置日誌記錄器
+logger = logging.getLogger(__name__)
+
 
 def create_research_manager(llm, memory):
     """
@@ -99,9 +111,34 @@ def create_research_manager(llm, memory):
 
 請提供專業且可執行的投資決策報告。"""
         
+        # 定義帶重試機制的 LLM 調用函數
+        # 只針對 Anthropic OverloadedError (529) 進行重試
+        # 配置：最多 3 次重試，指數退避（2、4、8 秒）
+        @retry(
+            retry=retry_if_exception_type(OverloadedError),
+            wait=wait_exponential(multiplier=1, min=2, max=10),
+            stop=stop_after_attempt(3),
+            before_sleep=before_sleep_log(logger, logging.WARNING)
+        )
+        def invoke_llm_with_retry(llm_instance, prompt_text):
+            """
+            調用 LLM 並在遇到 529 錯誤時自動重試。
+            
+            Args:
+                llm_instance: LLM 實例
+                prompt_text: 提示文本
+            
+            Returns:
+                LLM 的回應
+            
+            Raises:
+                OverloadedError: 如果 3 次重試後仍然失敗
+            """
+            logger.info("正在調用 Research Manager LLM...")
+            return llm_instance.invoke(prompt_text)
         
-        # 呼叫 LLM 生成回應
-        response = llm.invoke(prompt)
+        # 使用帶重試機制的函數調用 LLM
+        response = invoke_llm_with_retry(llm, prompt)
         
         # CRITICAL FIX: Apply output filtering
         response.content = fix_common_llm_errors(response.content)