From 2cdada6300efee015f19ab68887c72a1ece92b19 Mon Sep 17 00:00:00 2001 From: Zhigong Liu Date: Sat, 18 Apr 2026 22:30:31 -0400 Subject: [PATCH] fix: suppress memory injection when memory is empty (hallucination guard, closes #572) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When past_memories is empty, all five agents previously injected an empty string into their prompts while still instructing the LLM to "address reflections and learn from past mistakes" — causing the LLM to hallucinate fabricated lessons on first run. Each agent now conditionally builds its memory section only when past_memories is non-empty, so the injection and its instruction are both absent when there is nothing to recall. Also fixes import ordering in memory.py (logger after imports). Tests: tests/test_hallucination_guard.py covers empty and populated memory for all five agents (bull, bear, trader, research manager, portfolio manager). Companion to #563 (memory persistence). Co-Authored-By: Claude Sonnet 4.6 --- tests/test_hallucination_guard.py | 204 ++++++++++++++++++ .../agents/managers/portfolio_manager.py | 10 +- .../agents/managers/research_manager.py | 18 +- .../agents/researchers/bear_researcher.py | 13 +- .../agents/researchers/bull_researcher.py | 13 +- tradingagents/agents/trader/trader.py | 14 +- tradingagents/agents/utils/memory.py | 5 +- 7 files changed, 247 insertions(+), 30 deletions(-) create mode 100644 tests/test_hallucination_guard.py diff --git a/tests/test_hallucination_guard.py b/tests/test_hallucination_guard.py new file mode 100644 index 00000000..bcdc1772 --- /dev/null +++ b/tests/test_hallucination_guard.py @@ -0,0 +1,204 @@ +"""Tests for memory hallucination guard across all five agents. + +Verifies that: +- Memory section headers are ABSENT from prompts when memory is empty. +- Memory content IS injected when memory is populated. + +No LLM API calls are made — llm.invoke() is mocked. +""" + +from unittest.mock import MagicMock + +from tradingagents.agents.researchers.bull_researcher import create_bull_researcher +from tradingagents.agents.researchers.bear_researcher import create_bear_researcher +from tradingagents.agents.trader.trader import create_trader +from tradingagents.agents.managers.research_manager import create_research_manager +from tradingagents.agents.managers.portfolio_manager import create_portfolio_manager + + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + +def _make_state(): + return { + "investment_debate_state": { + "history": "", + "bull_history": "", + "bear_history": "", + "current_response": "", + "count": 0, + }, + "market_report": "market data", + "sentiment_report": "sentiment data", + "news_report": "news data", + "fundamentals_report": "fundamentals data", + "company_of_interest": "AAPL", + "trade_date": "2024-01-15", + "investment_plan": "buy AAPL", + "trader_investment_plan": "", + "risk_debate_state": { + "history": "", + "aggressive_history": "", + "conservative_history": "", + "neutral_history": "", + "judge_decision": "", + "count": 0, + "latest_speaker": "", + "current_aggressive_response": "", + "current_conservative_response": "", + "current_neutral_response": "", + }, + } + + +def _mock_llm(): + llm = MagicMock() + llm.invoke.return_value = MagicMock(content="mocked response") + return llm + + +def _empty_memory(): + m = MagicMock() + m.get_memories.return_value = [] + return m + + +def _populated_memory(lesson="Past lesson: watch macro risk"): + m = MagicMock() + m.get_memories.return_value = [{"recommendation": lesson, "similarity_score": 0.9}] + return m + + +# --------------------------------------------------------------------------- +# Bull Researcher +# --------------------------------------------------------------------------- + +def test_bull_omits_memory_section_when_empty(): + """No reflections header or instruction when memory is empty.""" + llm = _mock_llm() + node = create_bull_researcher(llm, _empty_memory()) + node(_make_state()) + + prompt = llm.invoke.call_args[0][0] + assert "Reflections from similar situations" not in prompt + assert "address reflections" not in prompt.lower() + assert "learn from lessons and mistakes" not in prompt.lower() + + +def test_bull_includes_memory_section_when_populated(): + """Lesson text appears in prompt when memory is populated.""" + llm = _mock_llm() + node = create_bull_researcher(llm, _populated_memory("Reduce tech exposure on rate hikes")) + node(_make_state()) + + prompt = llm.invoke.call_args[0][0] + assert "Reduce tech exposure on rate hikes" in prompt + assert "Reflections from similar situations" in prompt + + +# --------------------------------------------------------------------------- +# Bear Researcher +# --------------------------------------------------------------------------- + +def test_bear_omits_memory_section_when_empty(): + """No reflections header or instruction when memory is empty.""" + llm = _mock_llm() + node = create_bear_researcher(llm, _empty_memory()) + node(_make_state()) + + prompt = llm.invoke.call_args[0][0] + assert "Reflections from similar situations" not in prompt + assert "address reflections" not in prompt.lower() + assert "learn from lessons and mistakes" not in prompt.lower() + + +def test_bear_includes_memory_section_when_populated(): + """Lesson text appears in prompt when memory is populated.""" + llm = _mock_llm() + node = create_bear_researcher(llm, _populated_memory("Overestimated resilience in 2022")) + node(_make_state()) + + prompt = llm.invoke.call_args[0][0] + assert "Overestimated resilience in 2022" in prompt + assert "Reflections from similar situations" in prompt + + +# --------------------------------------------------------------------------- +# Trader +# --------------------------------------------------------------------------- + +def test_trader_omits_reflection_clause_when_empty(): + """No reflection text or 'No past memories found.' in system message when empty.""" + llm = _mock_llm() + node = create_trader(llm, _empty_memory()) + node(_make_state()) + + messages = llm.invoke.call_args[0][0] + system_content = messages[0]["content"] + assert "No past memories found" not in system_content + assert "Here are reflections" not in system_content + assert "Apply lessons from past decisions" not in system_content + + +def test_trader_includes_reflection_clause_when_populated(): + """Lesson text appears in system message when memory is populated.""" + llm = _mock_llm() + node = create_trader(llm, _populated_memory("Avoid chasing momentum tops")) + node(_make_state()) + + messages = llm.invoke.call_args[0][0] + system_content = messages[0]["content"] + assert "Avoid chasing momentum tops" in system_content + assert "Apply lessons from past decisions" in system_content + + +# --------------------------------------------------------------------------- +# Research Manager +# --------------------------------------------------------------------------- + +def test_research_manager_omits_memory_section_when_empty(): + """No past reflections header when memory is empty.""" + llm = _mock_llm() + node = create_research_manager(llm, _empty_memory()) + node(_make_state()) + + prompt = llm.invoke.call_args[0][0] + assert "Here are your past reflections on mistakes" not in prompt + assert "Take into account your past mistakes" not in prompt + + +def test_research_manager_includes_memory_section_when_populated(): + """Lesson text and header appear in prompt when memory is populated.""" + llm = _mock_llm() + node = create_research_manager(llm, _populated_memory("Missed earnings surprise signal")) + node(_make_state()) + + prompt = llm.invoke.call_args[0][0] + assert "Missed earnings surprise signal" in prompt + assert "Here are your past reflections on mistakes" in prompt + + +# --------------------------------------------------------------------------- +# Portfolio Manager +# --------------------------------------------------------------------------- + +def test_portfolio_manager_omits_lessons_line_when_empty(): + """No 'Lessons from past decisions' line when memory is empty.""" + llm = _mock_llm() + node = create_portfolio_manager(llm, _empty_memory()) + node(_make_state()) + + prompt = llm.invoke.call_args[0][0] + assert "Lessons from past decisions" not in prompt + + +def test_portfolio_manager_includes_lessons_line_when_populated(): + """Lesson text and label appear in prompt when memory is populated.""" + llm = _mock_llm() + node = create_portfolio_manager(llm, _populated_memory("Size down in low-liquidity names")) + node(_make_state()) + + prompt = llm.invoke.call_args[0][0] + assert "Size down in low-liquidity names" in prompt + assert "Lessons from past decisions" in prompt diff --git a/tradingagents/agents/managers/portfolio_manager.py b/tradingagents/agents/managers/portfolio_manager.py index 6c69ae9f..e2c8553c 100644 --- a/tradingagents/agents/managers/portfolio_manager.py +++ b/tradingagents/agents/managers/portfolio_manager.py @@ -18,9 +18,11 @@ def create_portfolio_manager(llm, memory): curr_situation = f"{market_research_report}\n\n{sentiment_report}\n\n{news_report}\n\n{fundamentals_report}" past_memories = memory.get_memories(curr_situation, n_matches=2) - past_memory_str = "" - for i, rec in enumerate(past_memories, 1): - past_memory_str += rec["recommendation"] + "\n\n" + if past_memories: + past_memory_str = "".join(rec["recommendation"] + "\n\n" for rec in past_memories) + lessons_line = f"- Lessons from past decisions: **{past_memory_str}**\n" + else: + lessons_line = "" prompt = f"""As the Portfolio Manager, synthesize the risk analysts' debate and deliver the final trading decision. @@ -38,7 +40,7 @@ def create_portfolio_manager(llm, memory): **Context:** - Research Manager's investment plan: **{research_plan}** - Trader's transaction proposal: **{trader_plan}** -- Lessons from past decisions: **{past_memory_str}** +{lessons_line} **Required Output Structure:** 1. **Rating**: State one of Buy / Overweight / Hold / Underweight / Sell. diff --git a/tradingagents/agents/managers/research_manager.py b/tradingagents/agents/managers/research_manager.py index 5b4b4fdc..d2443d27 100644 --- a/tradingagents/agents/managers/research_manager.py +++ b/tradingagents/agents/managers/research_manager.py @@ -16,9 +16,15 @@ def create_research_manager(llm, memory): curr_situation = f"{market_research_report}\n\n{sentiment_report}\n\n{news_report}\n\n{fundamentals_report}" past_memories = memory.get_memories(curr_situation, n_matches=2) - past_memory_str = "" - for i, rec in enumerate(past_memories, 1): - past_memory_str += rec["recommendation"] + "\n\n" + if past_memories: + past_memory_str = "".join(rec["recommendation"] + "\n\n" for rec in past_memories) + memory_section = ( + "Take into account your past mistakes on similar situations." + " Use these insights to refine your decision-making and ensure you are learning and improving.\n\n" + f"Here are your past reflections on mistakes:\n\"{past_memory_str}\"\n" + ) + else: + memory_section = "" prompt = f"""As the portfolio manager and debate facilitator, your role is to critically evaluate this round of debate and make a definitive decision: align with the bear analyst, the bull analyst, or choose Hold only if it is strongly justified based on the arguments presented. @@ -29,11 +35,9 @@ Additionally, develop a detailed investment plan for the trader. This should inc Your Recommendation: A decisive stance supported by the most convincing arguments. Rationale: An explanation of why these arguments lead to your conclusion. Strategic Actions: Concrete steps for implementing the recommendation. -Take into account your past mistakes on similar situations. Use these insights to refine your decision-making and ensure you are learning and improving. Present your analysis conversationally, as if speaking naturally, without special formatting. - -Here are your past reflections on mistakes: -\"{past_memory_str}\" +Present your analysis conversationally, as if speaking naturally, without special formatting. +{memory_section} {instrument_context} Here is the debate: diff --git a/tradingagents/agents/researchers/bear_researcher.py b/tradingagents/agents/researchers/bear_researcher.py index a44212dc..2143b237 100644 --- a/tradingagents/agents/researchers/bear_researcher.py +++ b/tradingagents/agents/researchers/bear_researcher.py @@ -15,9 +15,12 @@ def create_bear_researcher(llm, memory): curr_situation = f"{market_research_report}\n\n{sentiment_report}\n\n{news_report}\n\n{fundamentals_report}" past_memories = memory.get_memories(curr_situation, n_matches=2) - past_memory_str = "" - for i, rec in enumerate(past_memories, 1): - past_memory_str += rec["recommendation"] + "\n\n" + memory_section = ( + "Reflections from similar situations and lessons learned:\n" + + "".join(rec["recommendation"] + "\n\n" for rec in past_memories) + + "\nUse these past reflections to strengthen your argument." + if past_memories else "" + ) prompt = f"""You are a Bear Analyst making the case against investing in the stock. Your goal is to present a well-reasoned argument emphasizing risks, challenges, and negative indicators. Leverage the provided research and data to highlight potential downsides and counter bullish arguments effectively. @@ -37,8 +40,8 @@ Latest world affairs news: {news_report} Company fundamentals report: {fundamentals_report} Conversation history of the debate: {history} Last bull argument: {current_response} -Reflections from similar situations and lessons learned: {past_memory_str} -Use this information to deliver a compelling bear argument, refute the bull's claims, and engage in a dynamic debate that demonstrates the risks and weaknesses of investing in the stock. You must also address reflections and learn from lessons and mistakes you made in the past. +{memory_section} +Use this information to deliver a compelling bear argument, refute the bull's claims, and engage in a dynamic debate that demonstrates the risks and weaknesses of investing in the stock. """ response = llm.invoke(prompt) diff --git a/tradingagents/agents/researchers/bull_researcher.py b/tradingagents/agents/researchers/bull_researcher.py index d23d4d76..53a7e6c9 100644 --- a/tradingagents/agents/researchers/bull_researcher.py +++ b/tradingagents/agents/researchers/bull_researcher.py @@ -15,9 +15,12 @@ def create_bull_researcher(llm, memory): curr_situation = f"{market_research_report}\n\n{sentiment_report}\n\n{news_report}\n\n{fundamentals_report}" past_memories = memory.get_memories(curr_situation, n_matches=2) - past_memory_str = "" - for i, rec in enumerate(past_memories, 1): - past_memory_str += rec["recommendation"] + "\n\n" + memory_section = ( + "Reflections from similar situations and lessons learned:\n" + + "".join(rec["recommendation"] + "\n\n" for rec in past_memories) + + "\nUse these past reflections to strengthen your argument." + if past_memories else "" + ) prompt = f"""You are a Bull Analyst advocating for investing in the stock. Your task is to build a strong, evidence-based case emphasizing growth potential, competitive advantages, and positive market indicators. Leverage the provided research and data to address concerns and counter bearish arguments effectively. @@ -35,8 +38,8 @@ Latest world affairs news: {news_report} Company fundamentals report: {fundamentals_report} Conversation history of the debate: {history} Last bear argument: {current_response} -Reflections from similar situations and lessons learned: {past_memory_str} -Use this information to deliver a compelling bull argument, refute the bear's concerns, and engage in a dynamic debate that demonstrates the strengths of the bull position. You must also address reflections and learn from lessons and mistakes you made in the past. +{memory_section} +Use this information to deliver a compelling bull argument, refute the bear's concerns, and engage in a dynamic debate that demonstrates the strengths of the bull position. """ response = llm.invoke(prompt) diff --git a/tradingagents/agents/trader/trader.py b/tradingagents/agents/trader/trader.py index 07e9f262..1ef8501e 100644 --- a/tradingagents/agents/trader/trader.py +++ b/tradingagents/agents/trader/trader.py @@ -16,12 +16,12 @@ def create_trader(llm, memory): curr_situation = f"{market_research_report}\n\n{sentiment_report}\n\n{news_report}\n\n{fundamentals_report}" past_memories = memory.get_memories(curr_situation, n_matches=2) - past_memory_str = "" - if past_memories: - for i, rec in enumerate(past_memories, 1): - past_memory_str += rec["recommendation"] + "\n\n" - else: - past_memory_str = "No past memories found." + reflection_clause = ( + " Apply lessons from past decisions to strengthen your analysis." + " Here are reflections from similar situations you traded in and the lessons learned:\n" + + "".join(rec["recommendation"] + "\n\n" for rec in past_memories) + if past_memories else "" + ) context = { "role": "user", @@ -31,7 +31,7 @@ def create_trader(llm, memory): messages = [ { "role": "system", - "content": f"""You are a trading agent analyzing market data to make investment decisions. Based on your analysis, provide a specific recommendation to buy, sell, or hold. End with a firm decision and always conclude your response with 'FINAL TRANSACTION PROPOSAL: **BUY/HOLD/SELL**' to confirm your recommendation. Apply lessons from past decisions to strengthen your analysis. Here are reflections from similar situations you traded in and the lessons learned: {past_memory_str}""", + "content": f"""You are a trading agent analyzing market data to make investment decisions. Based on your analysis, provide a specific recommendation to buy, sell, or hold. End with a firm decision and always conclude your response with 'FINAL TRANSACTION PROPOSAL: **BUY/HOLD/SELL**' to confirm your recommendation.{reflection_clause}""", }, context, ] diff --git a/tradingagents/agents/utils/memory.py b/tradingagents/agents/utils/memory.py index 4ee6fab6..04cf0813 100644 --- a/tradingagents/agents/utils/memory.py +++ b/tradingagents/agents/utils/memory.py @@ -6,13 +6,14 @@ no token limits, works offline with any LLM provider. import json import logging +import re import tempfile from pathlib import Path -logger = logging.getLogger(__name__) from rank_bm25 import BM25Okapi from typing import List, Tuple -import re + +logger = logging.getLogger(__name__) class FinancialSituationMemory: