This commit is contained in:
jiwoomap 2026-01-05 00:09:02 +09:00 committed by GitHub
commit 21f856b8d8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 244 additions and 53 deletions

View File

@ -1,40 +1,19 @@
from .utils.agent_utils import create_msg_delete
from .utils.agent_states import AgentState, InvestDebateState, RiskDebateState
from .utils.memory import FinancialSituationMemory
from .analysts.fundamentals_analyst import create_fundamentals_analyst
from .analysts.market_analyst import create_market_analyst from .analysts.market_analyst import create_market_analyst
from .analysts.news_analyst import create_news_analyst from .analysts.news_analyst import create_news_analyst
from .analysts.fundamentals_analyst import create_fundamentals_analyst
from .analysts.social_media_analyst import create_social_media_analyst from .analysts.social_media_analyst import create_social_media_analyst
from .researchers.bear_researcher import create_bear_researcher
from .researchers.bull_researcher import create_bull_researcher from .researchers.bull_researcher import create_bull_researcher
from .researchers.bear_researcher import create_bear_researcher
from .managers.research_manager import create_research_manager
from .managers.risk_manager import create_risk_manager
from .managers.fact_checker import create_fact_checker # Added
from .trader.trader import create_trader
from .risk_mgmt.aggresive_debator import create_risky_debator from .risk_mgmt.aggresive_debator import create_risky_debator
from .risk_mgmt.conservative_debator import create_safe_debator from .risk_mgmt.conservative_debator import create_safe_debator
from .risk_mgmt.neutral_debator import create_neutral_debator from .risk_mgmt.neutral_debator import create_neutral_debator
from .managers.research_manager import create_research_manager from .utils.agent_utils import create_msg_delete
from .managers.risk_manager import create_risk_manager
from .trader.trader import create_trader
__all__ = [
"FinancialSituationMemory",
"AgentState",
"create_msg_delete",
"InvestDebateState",
"RiskDebateState",
"create_bear_researcher",
"create_bull_researcher",
"create_research_manager",
"create_fundamentals_analyst",
"create_market_analyst",
"create_neutral_debator",
"create_news_analyst",
"create_risky_debator",
"create_risk_manager",
"create_safe_debator",
"create_social_media_analyst",
"create_trader",
]

View File

@ -0,0 +1,205 @@
from langchain_core.messages import AIMessage
import json
import re
import requests
import warnings
from requests.packages.urllib3.exceptions import InsecureRequestWarning
from concurrent.futures import ThreadPoolExecutor
# Suppress only the single warning from urllib3 needed.
warnings.simplefilter('ignore', InsecureRequestWarning)
def verify_url(url):
try:
# Use requests library instead of curl for better portability across OS
# mimicking the headers we found effective
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'en-US,en;q=0.9',
'Referer': 'https://www.google.com/',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'cross-site',
'Sec-Fetch-User': '?1',
'Cache-Control': 'max-age=0'
}
# Use GET with stream=True to check headers/status without downloading full body
# verify=False is equivalent to curl -k (insecure)
response = requests.get(url, headers=headers, timeout=15, stream=True, verify=False)
status_code = response.status_code
# Treat 403 as potentially accessible but blocked by WAF
if 200 <= status_code < 400:
return url, "VALID"
elif status_code == 404:
return url, "NOT FOUND (404)"
elif status_code == 403:
return url, "VALID (Protected/403)"
else:
return url, f"ACCESSIBLE (Status: {status_code})"
except Exception as e:
# Fallback error handling
return url, f"ERROR (Could not access: {str(e)})"
def get_unique_urls(text):
if not text:
return []
# Simple regex to find URLs
urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', text)
# Remove duplicates and clean trailing punctuation
clean_urls = []
for url in urls:
# Strip common trailing punctuation that might be captured
url = url.rstrip(').,;]')
clean_urls.append(url)
return list(set(clean_urls))
def check_urls_and_get_data(text, source_label):
unique_urls = get_unique_urls(text)
if not unique_urls:
return []
results = []
# Limit max workers to avoid being flagged as DoS
with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(verify_url, url) for url in unique_urls]
for future in futures:
url, status = future.result()
results.append({
"url": url,
"status": status,
"source": source_label
})
return results
def create_fact_checker(llm):
def fact_checker_node(state) -> dict:
investment_debate_state = state["investment_debate_state"]
current_response = investment_debate_state.get("current_response", "")
# Reports to verify against
market_research_report = state["market_report"]
sentiment_report = state["sentiment_report"]
news_report = state["news_report"]
fundamentals_report = state["fundamentals_report"]
# If there's no response to check, pass
if not current_response:
return {}
# Verify URLs - collect structured data
verified_data = []
# 1. Check URLs in News Report
news_verified = check_urls_and_get_data(news_report, "News Analyst")
verified_data.extend(news_verified)
# 2. Check URLs in Current Response (Debate)
response_verified = check_urls_and_get_data(current_response, "Debate Speaker")
verified_data.extend(response_verified)
# Deduplicate by URL (keep first occurrence or merge sources)
unique_verified_map = {}
for item in verified_data:
if item["url"] not in unique_verified_map:
unique_verified_map[item["url"]] = item
else:
# If already exists, append source if different
if item["source"] not in unique_verified_map[item["url"]]["source"]:
unique_verified_map[item["url"]]["source"] += f", {item['source']}"
final_verified_list = list(unique_verified_map.values())
# Generate text report for LLM prompt
url_check_report = ""
if final_verified_list:
url_check_report = "\n[URL Verification Report]\n"
for item in final_verified_list:
url_check_report += f"- {item['url']}: {item['status']} (Source: {item['source']})\n"
print(f"DEBUG: URL Verification completed.\n{url_check_report}")
prompt = f"""You are a strict Fact Checker for a financial analysis team.
Your job is to verify the claims made in the following statement against the provided source reports AND verify the validity of the sources.
Statement to Verify:
"{current_response}"
Source Reports:
1. Market Research: {market_research_report}
2. Sentiment Report: {sentiment_report}
3. News Report: {news_report}
4. Fundamentals Report: {fundamentals_report}
URL Verification Status (Physical Check of Links):
{url_check_report}
Instructions:
1. Extract every factual claim (numbers, dates, specific news events) from the statement.
2. Check if each claim exists in the Source Reports.
3. **CRITICAL**: Check the "URL Verification Status" section. If the statement relies on a news article whose URL is marked as "NOT FOUND", "ERROR", or "INVALID", you MUST flag this as a potential invalid source or deleted article.
4. If a claim is NOT found or contradicts the reports, flag it as a HALLUCINATION.
5. If the statement is mostly opinion/analysis, it is acceptable.
6. **ALWAYS** list the Verified Sources (URLs) at the end of your response, even if verified.
Output Format:
If all facts are supported and sources are valid:
"VERIFIED: The statement is consistent with the provided data and sources appear valid.
[Verified Sources]
- [URL1] (Status: VALID)
...
"
If errors are found:
"CORRECTION NEEDED:
- Claim: [Claim] -> Error: [Not found / Contradiction / Source URL Invalid or Deleted]
...
[Verified Sources]
- [URL1] (Status: ...)
...
"
Be extremely strict about numbers. If revenue is 10B in reports but statement says 12B, flag it.
Be strict about source validity. If a key argument is based on a broken link, flag it.
"""
response = llm.invoke(prompt)
check_result = response.content
# Log the fact check result
print(f"\n\n[Fact Checker]:\n{check_result}\n")
# Prepare state update
new_state = investment_debate_state.copy()
# Merge existing verified urls with new ones
existing_verified = new_state.get("verified_urls", [])
# We want to accumulate unique URLs seen so far
existing_url_map = {item['url']: item for item in existing_verified}
for item in final_verified_list:
existing_url_map[item['url']] = item
new_state["verified_urls"] = list(existing_url_map.values())
if "CORRECTION NEEDED" in check_result:
updated_response = f"{current_response}\n\n[SYSTEM NOTE: Fact Check Warning]\n{check_result}"
new_state["current_response"] = updated_response
new_state["history"] += f"\n\n[Fact Check Warning]: {check_result}"
return {"investment_debate_state": new_state}
elif "VERIFIED" in check_result and url_check_report:
new_state["history"] += f"\n\n[Fact Checker]: {check_result}"
return {"investment_debate_state": new_state}
# Even if no text update, we return state to save verified_urls
return {"investment_debate_state": new_state}
return fact_checker_node

View File

@ -31,21 +31,21 @@ Strategic Actions: Concrete steps for implementing the recommendation.
Take into account your past mistakes on similar situations. Use these insights to refine your decision-making and ensure you are learning and improving. Present your analysis conversationally, as if speaking naturally, without special formatting. Take into account your past mistakes on similar situations. Use these insights to refine your decision-making and ensure you are learning and improving. Present your analysis conversationally, as if speaking naturally, without special formatting.
Here are your past reflections on mistakes: Here are your past reflections on mistakes:
\"{past_memory_str}\" "{past_memory_str}"
Here is the debate: Here is the debate:
Debate History: Debate History:
{history}""" {history}"""
response = llm.invoke(prompt) response = llm.invoke(prompt)
new_investment_debate_state = { # Log the manager's decision
print(f"\n\n[Research Manager Decision]\n{response.content}\n")
new_investment_debate_state = investment_debate_state.copy()
new_investment_debate_state.update({
"judge_decision": response.content, "judge_decision": response.content,
"history": investment_debate_state.get("history", ""),
"bear_history": investment_debate_state.get("bear_history", ""),
"bull_history": investment_debate_state.get("bull_history", ""),
"current_response": response.content, "current_response": response.content,
"count": investment_debate_state["count"], })
}
return { return {
"investment_debate_state": new_investment_debate_state, "investment_debate_state": new_investment_debate_state,

View File

@ -1,4 +1,4 @@
from typing import Annotated, Sequence from typing import Annotated, Sequence, List, Dict
from datetime import date, timedelta, datetime from datetime import date, timedelta, datetime
from typing_extensions import TypedDict, Optional from typing_extensions import TypedDict, Optional
from langchain_openai import ChatOpenAI from langchain_openai import ChatOpenAI
@ -19,6 +19,7 @@ class InvestDebateState(TypedDict):
current_response: Annotated[str, "Latest response"] # Last response current_response: Annotated[str, "Latest response"] # Last response
judge_decision: Annotated[str, "Final judge decision"] # Last response judge_decision: Annotated[str, "Final judge decision"] # Last response
count: Annotated[int, "Length of the current conversation"] # Conversation length count: Annotated[int, "Length of the current conversation"] # Conversation length
verified_urls: Annotated[List[Dict], "List of verified URLs with status and source"]
# Risk management team state # Risk management team state

View File

@ -21,7 +21,9 @@ def get_google_news(
for news in news_results: for news in news_results:
news_str += ( news_str += (
f"### {news['title']} (source: {news['source']}) \n\n{news['snippet']}\n\n" f"### {news['title']} (source: {news['source']}) \n"
f"URL: {news['link']}\n"
f"{news['snippet']}\n\n"
) )
if len(news_results) == 0: if len(news_results) == 0:

View File

@ -110,7 +110,9 @@ def get_finnhub_news(
continue continue
for entry in data: for entry in data:
current_news = ( current_news = (
"### " + entry["headline"] + f" ({day})" + "\n" + entry["summary"] f"### {entry['headline']} ({day})\n"
f"URL: {entry.get('url', 'N/A')}\n"
f"{entry['summary']}"
) )
combined_result += current_news + "\n\n" combined_result += current_news + "\n\n"
@ -411,9 +413,9 @@ def get_reddit_global_news(
news_str = "" news_str = ""
for post in posts: for post in posts:
if post["content"] == "": if post["content"] == "":
news_str += f"### {post['title']}\n\n" news_str += f"### {post['title']}\nURL: {post.get('url', 'N/A')}\n\n"
else: else:
news_str += f"### {post['title']}\n\n{post['content']}\n\n" news_str += f"### {post['title']}\nURL: {post.get('url', 'N/A')}\n\n{post['content']}\n\n"
return f"## Global News Reddit, from {before} to {curr_date}:\n{news_str}" return f"## Global News Reddit, from {before} to {curr_date}:\n{news_str}"
@ -468,8 +470,8 @@ def get_reddit_company_news(
news_str = "" news_str = ""
for post in posts: for post in posts:
if post["content"] == "": if post["content"] == "":
news_str += f"### {post['title']}\n\n" news_str += f"### {post['title']}\nURL: {post.get('url', 'N/A')}\n\n"
else: else:
news_str += f"### {post['title']}\n\n{post['content']}\n\n" news_str += f"### {post['title']}\nURL: {post.get('url', 'N/A')}\n\n{post['content']}\n\n"
return f"##{query} News Reddit, from {start_date} to {end_date}:\n\n{news_str}" return f"##{query} News Reddit, from {start_date} to {end_date}:\n\n{news_str}"

View File

@ -92,6 +92,8 @@ class GraphSetup:
bear_researcher_node = create_bear_researcher( bear_researcher_node = create_bear_researcher(
self.quick_thinking_llm, self.bear_memory self.quick_thinking_llm, self.bear_memory
) )
fact_checker_node = create_fact_checker(self.quick_thinking_llm)
research_manager_node = create_research_manager( research_manager_node = create_research_manager(
self.deep_thinking_llm, self.invest_judge_memory self.deep_thinking_llm, self.invest_judge_memory
) )
@ -119,6 +121,7 @@ class GraphSetup:
# Add other nodes # Add other nodes
workflow.add_node("Bull Researcher", bull_researcher_node) workflow.add_node("Bull Researcher", bull_researcher_node)
workflow.add_node("Bear Researcher", bear_researcher_node) workflow.add_node("Bear Researcher", bear_researcher_node)
workflow.add_node("Fact Checker", fact_checker_node)
workflow.add_node("Research Manager", research_manager_node) workflow.add_node("Research Manager", research_manager_node)
workflow.add_node("Trader", trader_node) workflow.add_node("Trader", trader_node)
workflow.add_node("Risky Analyst", risky_analyst) workflow.add_node("Risky Analyst", risky_analyst)
@ -153,22 +156,21 @@ class GraphSetup:
workflow.add_edge(current_clear, "Bull Researcher") workflow.add_edge(current_clear, "Bull Researcher")
# Add remaining edges # Add remaining edges
# Bull/Bear Researchers now go to Fact Checker instead of conditional edge
workflow.add_edge("Bull Researcher", "Fact Checker")
workflow.add_edge("Bear Researcher", "Fact Checker")
# Fact Checker decides where to go next
workflow.add_conditional_edges( workflow.add_conditional_edges(
"Bull Researcher", "Fact Checker",
self.conditional_logic.should_continue_debate, self.conditional_logic.should_continue_debate,
{ {
"Bull Researcher": "Bull Researcher",
"Bear Researcher": "Bear Researcher", "Bear Researcher": "Bear Researcher",
"Research Manager": "Research Manager", "Research Manager": "Research Manager",
}, },
) )
workflow.add_conditional_edges(
"Bear Researcher",
self.conditional_logic.should_continue_debate,
{
"Bull Researcher": "Bull Researcher",
"Research Manager": "Research Manager",
},
)
workflow.add_edge("Research Manager", "Trader") workflow.add_edge("Research Manager", "Trader")
workflow.add_edge("Trader", "Risky Analyst") workflow.add_edge("Trader", "Risky Analyst")
workflow.add_conditional_edges( workflow.add_conditional_edges(