feat: improve error handling
This commit is contained in:
parent
51fc6c23de
commit
350c18dc0b
|
|
@ -0,0 +1,42 @@
|
||||||
|
"""Standalone diagnostic script to test a single LLM call with resilience.
|
||||||
|
Run: python debug_llm_call.py --provider openai --model gpt-4o-mini --message "Test message".
|
||||||
|
It will respect environment variables for keys and SSL the same way the graph does.
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from tradingagents.default_config import DEFAULT_CONFIG
|
||||||
|
from tradingagents.graph.trading_graph import TradingAgentsGraph
|
||||||
|
from langchain_core.messages import HumanMessage
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--provider', default=DEFAULT_CONFIG['llm_provider'])
|
||||||
|
parser.add_argument('--model', default=DEFAULT_CONFIG['quick_think_llm'])
|
||||||
|
parser.add_argument('--message', default='Say hello and include a short market summary placeholder.')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
cfg = DEFAULT_CONFIG.copy()
|
||||||
|
cfg['llm_provider'] = args.provider
|
||||||
|
cfg['quick_think_llm'] = args.model
|
||||||
|
cfg['deep_think_llm'] = args.model
|
||||||
|
|
||||||
|
graph = TradingAgentsGraph(config=cfg)
|
||||||
|
# Build a minimal state for market analyst
|
||||||
|
state = {
|
||||||
|
'trade_date': '2025-09-29',
|
||||||
|
'company_of_interest': 'AAPL',
|
||||||
|
'messages': [HumanMessage(content=args.message)],
|
||||||
|
}
|
||||||
|
market_node = graph.graph_setup.analyst_nodes.get('market')
|
||||||
|
if not market_node:
|
||||||
|
print('Market node not found in graph setup.')
|
||||||
|
return
|
||||||
|
# Directly invoke underlying function if possible
|
||||||
|
result_state = market_node(state)
|
||||||
|
print('Result keys:', list(result_state.keys()))
|
||||||
|
print('Market report snippet:', str(result_state.get('market_report',''))[:500])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Binary file not shown.
|
|
@ -1,6 +1,7 @@
|
||||||
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
|
from tradingagents.agents.utils.llm_resilience import invoke_with_retries
|
||||||
|
|
||||||
|
|
||||||
def create_fundamentals_analyst(llm, toolkit):
|
def create_fundamentals_analyst(llm, toolkit):
|
||||||
|
|
@ -55,13 +56,19 @@ def create_fundamentals_analyst(llm, toolkit):
|
||||||
prompt = prompt.partial(ticker=ticker)
|
prompt = prompt.partial(ticker=ticker)
|
||||||
|
|
||||||
chain = prompt | llm.bind_tools(tools)
|
chain = prompt | llm.bind_tools(tools)
|
||||||
|
try:
|
||||||
result = chain.invoke(state["messages"])
|
result = invoke_with_retries(chain, state["messages"], toolkit.config)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
class DummyResult:
|
||||||
|
def __init__(self, content):
|
||||||
|
self.content = content
|
||||||
|
self.tool_calls = []
|
||||||
|
result = DummyResult(f"Fundamentals analyst failed after retries. Error: {e}")
|
||||||
|
|
||||||
report = ""
|
report = ""
|
||||||
|
|
||||||
if len(result.tool_calls) == 0:
|
if getattr(result, 'tool_calls', []) == []:
|
||||||
report = result.content
|
report = getattr(result, 'content', '')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"messages": [result],
|
"messages": [result],
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
|
from tradingagents.agents.utils.llm_resilience import invoke_with_retries
|
||||||
|
|
||||||
|
|
||||||
def create_market_analyst(llm, toolkit):
|
def create_market_analyst(llm, toolkit):
|
||||||
|
|
@ -85,12 +86,22 @@ Bullish and Bearish Candlestick Patterns:
|
||||||
|
|
||||||
chain = prompt | llm.bind_tools(tools)
|
chain = prompt | llm.bind_tools(tools)
|
||||||
|
|
||||||
result = chain.invoke(state["messages"])
|
# Resilient invocation with retries
|
||||||
|
try:
|
||||||
|
result = invoke_with_retries(chain, state["messages"], toolkit.config)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
# Provide a graceful degraded response so graph can continue / be logged
|
||||||
|
fallback_content = f"Market analyst failed to retrieve a model response after retries. Error: {e}"
|
||||||
|
class DummyResult:
|
||||||
|
def __init__(self, content):
|
||||||
|
self.content = content
|
||||||
|
self.tool_calls = []
|
||||||
|
result = DummyResult(fallback_content)
|
||||||
|
|
||||||
report = ""
|
report = ""
|
||||||
|
|
||||||
if len(result.tool_calls) == 0:
|
if getattr(result, 'tool_calls', []) == []:
|
||||||
report = result.content
|
report = getattr(result, 'content', '')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"messages": [result],
|
"messages": [result],
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
|
from tradingagents.agents.utils.llm_resilience import invoke_with_retries
|
||||||
|
|
||||||
|
|
||||||
def create_news_analyst(llm, toolkit):
|
def create_news_analyst(llm, toolkit):
|
||||||
|
|
@ -45,12 +46,19 @@ def create_news_analyst(llm, toolkit):
|
||||||
prompt = prompt.partial(ticker=ticker)
|
prompt = prompt.partial(ticker=ticker)
|
||||||
|
|
||||||
chain = prompt | llm.bind_tools(tools)
|
chain = prompt | llm.bind_tools(tools)
|
||||||
result = chain.invoke(state["messages"])
|
try:
|
||||||
|
result = invoke_with_retries(chain, state["messages"], toolkit.config)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
class DummyResult:
|
||||||
|
def __init__(self, content):
|
||||||
|
self.content = content
|
||||||
|
self.tool_calls = []
|
||||||
|
result = DummyResult(f"News analyst failed after retries. Error: {e}")
|
||||||
|
|
||||||
report = ""
|
report = ""
|
||||||
|
|
||||||
if len(result.tool_calls) == 0:
|
if getattr(result, 'tool_calls', []) == []:
|
||||||
report = result.content
|
report = getattr(result, 'content', '')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"messages": [result],
|
"messages": [result],
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
|
from tradingagents.agents.utils.llm_resilience import invoke_with_retries
|
||||||
|
|
||||||
|
|
||||||
def create_social_media_analyst(llm, toolkit):
|
def create_social_media_analyst(llm, toolkit):
|
||||||
|
|
@ -44,13 +45,19 @@ def create_social_media_analyst(llm, toolkit):
|
||||||
prompt = prompt.partial(ticker=ticker)
|
prompt = prompt.partial(ticker=ticker)
|
||||||
|
|
||||||
chain = prompt | llm.bind_tools(tools)
|
chain = prompt | llm.bind_tools(tools)
|
||||||
|
try:
|
||||||
result = chain.invoke(state["messages"])
|
result = invoke_with_retries(chain, state["messages"], toolkit.config)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
class DummyResult:
|
||||||
|
def __init__(self, content):
|
||||||
|
self.content = content
|
||||||
|
self.tool_calls = []
|
||||||
|
result = DummyResult(f"Social media analyst failed after retries. Error: {e}")
|
||||||
|
|
||||||
report = ""
|
report = ""
|
||||||
|
|
||||||
if len(result.tool_calls) == 0:
|
if getattr(result, 'tool_calls', []) == []:
|
||||||
report = result.content
|
report = getattr(result, 'content', '')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"messages": [result],
|
"messages": [result],
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Any, Callable, Dict
|
||||||
|
from json import JSONDecodeError
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def invoke_with_retries(chain: Any, messages: Any, config: Dict[str, Any]):
|
||||||
|
"""Invoke a langchain chain with retries and detailed logging.
|
||||||
|
|
||||||
|
Handles transient HTTP issues and JSON decode errors coming from provider SDKs.
|
||||||
|
"""
|
||||||
|
max_retries = config.get("llm_max_retries", 3)
|
||||||
|
backoff = config.get("llm_retry_backoff", 2.0)
|
||||||
|
|
||||||
|
last_err = None
|
||||||
|
for attempt in range(1, max_retries + 1):
|
||||||
|
try:
|
||||||
|
result = chain.invoke(messages)
|
||||||
|
return result
|
||||||
|
except JSONDecodeError as e:
|
||||||
|
last_err = e
|
||||||
|
logger.warning(
|
||||||
|
"JSONDecodeError on attempt %s/%s: %s", attempt, max_retries, e
|
||||||
|
)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
# Capture common transient network / HTTP errors keywords
|
||||||
|
transient = any(
|
||||||
|
kw in str(e).lower() for kw in [
|
||||||
|
"timeout", "temporarily", "rate limit", "connection reset", "503", "502", "jsondecodeerror"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
last_err = e
|
||||||
|
logger.warning(
|
||||||
|
"LLM invocation error (transient=%s) attempt %s/%s: %s", transient, attempt, max_retries, e
|
||||||
|
)
|
||||||
|
if not transient and not isinstance(e, JSONDecodeError):
|
||||||
|
# Non transient -> abort early
|
||||||
|
break
|
||||||
|
# Exponential backoff
|
||||||
|
sleep_for = backoff ** (attempt - 1)
|
||||||
|
time.sleep(sleep_for)
|
||||||
|
# All attempts failed
|
||||||
|
raise last_err # propagate last error
|
||||||
|
|
@ -33,4 +33,8 @@ DEFAULT_CONFIG = {
|
||||||
# Proxy settings (if needed)
|
# Proxy settings (if needed)
|
||||||
"http_proxy": os.getenv("HTTP_PROXY"),
|
"http_proxy": os.getenv("HTTP_PROXY"),
|
||||||
"https_proxy": os.getenv("HTTPS_PROXY"),
|
"https_proxy": os.getenv("HTTPS_PROXY"),
|
||||||
|
# LLM resilience settings
|
||||||
|
"llm_max_retries": int(os.getenv("LLM_MAX_RETRIES", "3")),
|
||||||
|
"llm_retry_backoff": float(os.getenv("LLM_RETRY_BACKOFF", "2")), # seconds exponential base
|
||||||
|
"debug_http": os.getenv("DEBUG_HTTP", "false").lower() in ("1", "true", "yes"),
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue