From d2ebd6d58759539a6e47cb1fc13105946a1c7bcd Mon Sep 17 00:00:00 2001 From: "swj.premkumar" Date: Fri, 9 Jan 2026 21:40:45 -0600 Subject: [PATCH] ickerAnonymizer` into all analyst agents (`Market`, `News`, `Fundamentals`, `Social`) and data tools. The LLM now only sees "ASSET_XXX" in prompts, preventing data contamination. --- .../agents/analysts/fundamentals_analyst.py | 14 +++++- .../agents/analysts/market_analyst.py | 15 +++++- tradingagents/agents/analysts/news_analyst.py | 15 +++++- .../agents/analysts/social_media_analyst.py | 11 ++++- .../agents/utils/core_stock_tools.py | 18 ++++++- .../agents/utils/fundamental_data_tools.py | 24 ++++++++-- tradingagents/agents/utils/news_data_tools.py | 48 +++++++++++++++++-- .../utils/technical_indicators_tools.py | 17 ++++++- tradingagents/utils/anonymizer.py | 44 ++++++++++++++++- verify_google_key.py | 34 +++++++++++++ 10 files changed, 220 insertions(+), 20 deletions(-) create mode 100644 verify_google_key.py diff --git a/tradingagents/agents/analysts/fundamentals_analyst.py b/tradingagents/agents/analysts/fundamentals_analyst.py index e20139cb..b2fae62e 100644 --- a/tradingagents/agents/analysts/fundamentals_analyst.py +++ b/tradingagents/agents/analysts/fundamentals_analyst.py @@ -5,11 +5,21 @@ from tradingagents.agents.utils.agent_utils import get_fundamentals, get_balance from tradingagents.dataflows.config import get_config +from tradingagents.utils.anonymizer import TickerAnonymizer + +# Initialize anonymizer +anonymizer = TickerAnonymizer() + def create_fundamentals_analyst(llm): def fundamentals_analyst_node(state): current_date = state["trade_date"] - ticker = state["company_of_interest"] - company_name = state["company_of_interest"] + real_ticker = state["company_of_interest"] + company_name = state["company_of_interest"] # Acting as placeholder name + + # BLINDFIRE PROTOCOL: Anonymize Ticker + # We set name here too just in case fundamentals runs first or independently + anonymizer.set_company_name(real_ticker, company_name) + ticker = anonymizer.anonymize_ticker(real_ticker) tools = [ get_fundamentals, diff --git a/tradingagents/agents/analysts/market_analyst.py b/tradingagents/agents/analysts/market_analyst.py index 5006b20a..95de76ae 100644 --- a/tradingagents/agents/analysts/market_analyst.py +++ b/tradingagents/agents/analysts/market_analyst.py @@ -5,12 +5,23 @@ from tradingagents.agents.utils.agent_utils import get_stock_data, get_indicator from tradingagents.dataflows.config import get_config +from tradingagents.utils.anonymizer import TickerAnonymizer + +# Initialize anonymizer (shared instance appropriate here or inside) +anonymizer = TickerAnonymizer() + def create_market_analyst(llm): def market_analyst_node(state): current_date = state["trade_date"] - ticker = state["company_of_interest"] - company_name = state["company_of_interest"] + real_ticker = state["company_of_interest"] + company_name = state["company_of_interest"] # In this context acting as name too + + # BLINDFIRE PROTOCOL: Anonymize Ticker + anonymizer.set_company_name(real_ticker, company_name) + ticker = anonymizer.anonymize_ticker(real_ticker) + + # NOTE: We continue to use 'ticker' variable name but it now holds 'ASSET_XXX' tools = [ get_stock_data, diff --git a/tradingagents/agents/analysts/news_analyst.py b/tradingagents/agents/analysts/news_analyst.py index 03b4fae4..dbbeffaf 100644 --- a/tradingagents/agents/analysts/news_analyst.py +++ b/tradingagents/agents/analysts/news_analyst.py @@ -5,10 +5,23 @@ from tradingagents.agents.utils.agent_utils import get_news, get_global_news from tradingagents.dataflows.config import get_config +from tradingagents.utils.anonymizer import TickerAnonymizer + +# Initialize anonymizer +anonymizer = TickerAnonymizer() + def create_news_analyst(llm): def news_analyst_node(state): current_date = state["trade_date"] - ticker = state["company_of_interest"] + real_ticker = state["company_of_interest"] + + # BLINDFIRE PROTOCOL: Anonymize Ticker + ticker = anonymizer.anonymize_ticker(real_ticker) + # Note: company name registration happens in market_analyst primarily, + # but we can do it here too if not already set, or just use ticker mapping. + # Since state doesn't always have full company name guaranteed in all flows, + # we rely on market_analyst or previous steps, or just ticker hashing here. + tools = [ get_news, diff --git a/tradingagents/agents/analysts/social_media_analyst.py b/tradingagents/agents/analysts/social_media_analyst.py index b25712d7..b686cd5d 100644 --- a/tradingagents/agents/analysts/social_media_analyst.py +++ b/tradingagents/agents/analysts/social_media_analyst.py @@ -5,12 +5,21 @@ from tradingagents.agents.utils.agent_utils import get_news from tradingagents.dataflows.config import get_config +from tradingagents.utils.anonymizer import TickerAnonymizer + +# Initialize anonymizer +anonymizer = TickerAnonymizer() + def create_social_media_analyst(llm): def social_media_analyst_node(state): current_date = state["trade_date"] - ticker = state["company_of_interest"] + real_ticker = state["company_of_interest"] company_name = state["company_of_interest"] + # BLINDFIRE PROTOCOL: Anonymize Ticker + anonymizer.set_company_name(real_ticker, company_name) + ticker = anonymizer.anonymize_ticker(real_ticker) + tools = [ get_news, ] diff --git a/tradingagents/agents/utils/core_stock_tools.py b/tradingagents/agents/utils/core_stock_tools.py index 3a416622..5092f7d3 100644 --- a/tradingagents/agents/utils/core_stock_tools.py +++ b/tradingagents/agents/utils/core_stock_tools.py @@ -1,7 +1,7 @@ from langchain_core.tools import tool from typing import Annotated from tradingagents.dataflows.interface import route_to_vendor - +from tradingagents.utils.anonymizer import TickerAnonymizer @tool def get_stock_data( @@ -19,4 +19,18 @@ def get_stock_data( Returns: str: A formatted dataframe containing the stock price data for the specified ticker symbol in the specified date range. """ - return route_to_vendor("get_stock_data", symbol, start_date, end_date) + # Initialize anonymizer locally to ensure fresh state loading + anonymizer = TickerAnonymizer() + + # 1. Deanonymize ticker (ASSET_XXX -> AAPL) + real_ticker = anonymizer.deanonymize_ticker(symbol) + if not real_ticker: + real_ticker = symbol # Fallback if not anonymized + + # 2. Get Data using Real Ticker + raw_data = route_to_vendor("get_stock_data", real_ticker, start_date, end_date) + + # 3. Anonymize Output (AAPL -> ASSET_XXX) + anonymized_data = anonymizer.anonymize_text(raw_data, real_ticker) + + return anonymized_data diff --git a/tradingagents/agents/utils/fundamental_data_tools.py b/tradingagents/agents/utils/fundamental_data_tools.py index 47f6f2eb..26ba57dc 100644 --- a/tradingagents/agents/utils/fundamental_data_tools.py +++ b/tradingagents/agents/utils/fundamental_data_tools.py @@ -1,7 +1,23 @@ from langchain_core.tools import tool from typing import Annotated from tradingagents.dataflows.interface import route_to_vendor +from tradingagents.utils.anonymizer import TickerAnonymizer +def _process_vendor_call(func_name, ticker, *args): + """Helper to handle anonymization for vendor calls""" + # Initialize locally to ensure fresh state + anonymizer = TickerAnonymizer() + + # 1. Deanonymize ticker + real_ticker = anonymizer.deanonymize_ticker(ticker) + if not real_ticker: + real_ticker = ticker + + # 2. Get Data + raw_data = route_to_vendor(func_name, real_ticker, *args) + + # 3. Anonymize Output + return anonymizer.anonymize_text(raw_data, real_ticker) @tool def get_fundamentals( @@ -17,7 +33,7 @@ def get_fundamentals( Returns: str: A formatted report containing comprehensive fundamental data """ - return route_to_vendor("get_fundamentals", ticker, curr_date) + return _process_vendor_call("get_fundamentals", ticker, curr_date) @tool @@ -36,7 +52,7 @@ def get_balance_sheet( Returns: str: A formatted report containing balance sheet data """ - return route_to_vendor("get_balance_sheet", ticker, freq, curr_date) + return _process_vendor_call("get_balance_sheet", ticker, freq, curr_date) @tool @@ -55,7 +71,7 @@ def get_cashflow( Returns: str: A formatted report containing cash flow statement data """ - return route_to_vendor("get_cashflow", ticker, freq, curr_date) + return _process_vendor_call("get_cashflow", ticker, freq, curr_date) @tool @@ -74,4 +90,4 @@ def get_income_statement( Returns: str: A formatted report containing income statement data """ - return route_to_vendor("get_income_statement", ticker, freq, curr_date) \ No newline at end of file + return _process_vendor_call("get_income_statement", ticker, freq, curr_date) \ No newline at end of file diff --git a/tradingagents/agents/utils/news_data_tools.py b/tradingagents/agents/utils/news_data_tools.py index 0df9d047..ee4fbb4a 100644 --- a/tradingagents/agents/utils/news_data_tools.py +++ b/tradingagents/agents/utils/news_data_tools.py @@ -1,6 +1,32 @@ from langchain_core.tools import tool from typing import Annotated from tradingagents.dataflows.interface import route_to_vendor +from tradingagents.utils.anonymizer import TickerAnonymizer + +def _process_vendor_call(func_name, ticker=None, *args): + """Helper to handle anonymization for vendor calls""" + # Initialize locally to ensure fresh state + anonymizer = TickerAnonymizer() + + real_ticker = None + if ticker: + # 1. Deanonymize ticker + real_ticker = anonymizer.deanonymize_ticker(ticker) + if not real_ticker: + real_ticker = ticker + + # 2. Get Data + # Handle optional ticker for global_news + call_args = [real_ticker] + list(args) if ticker else list(args) + raw_data = route_to_vendor(func_name, *call_args) + + # 3. Anonymize Output + # For global news, passing ticker=None to anonymize_text might skip ticker-specific masking, + # but still mask known mapped tickers if logic supports it. + # Current anonymize_text requires ticker context for "Company X". + # For global news, we might need a generic pass or skip specific company names if unknown. + # However, for now we pass real_ticker if available. + return anonymizer.anonymize_text(raw_data, real_ticker) if real_ticker else raw_data @tool def get_news( @@ -18,7 +44,7 @@ def get_news( Returns: str: A formatted string containing news data """ - return route_to_vendor("get_news", ticker, start_date, end_date) + return _process_vendor_call("get_news", ticker, start_date, end_date) @tool def get_global_news( @@ -36,6 +62,18 @@ def get_global_news( Returns: str: A formatted string containing global news data """ + # Global news doesn't take a ticker as input, so pass None as ticker + # We rely on the vendor call just taking args. + # Note: route_to_vendor expects func_name, *args. + # Our helper expects func_name, ticker, *args. + # So we call route_to_vendor directly here but still might want to anonymize output? + # Global news might mention "Apple". If we are analyzing "ASSET_042" (Apple), we typically want to mask it. + # But without a specific target ticker in context, it's hard. + # For now, let's just return raw global news or we'd need to mask ALL known mapped tickers. + # The current Anonymizer is context-aware (one ticker). + # Ideally, get_global_news should probably stay raw or be masked for the 'current company of interest' + # but tools don't know the agent's context unless passed. + # Leaving global news RAW for now as it provides macro context. return route_to_vendor("get_global_news", curr_date, look_back_days, limit) @tool @@ -48,11 +86,11 @@ def get_insider_sentiment( Uses the configured news_data vendor. Args: ticker (str): Ticker symbol of the company - curr_date (str): Current date you are trading at, yyyy-mm-dd + curr_date (str): Current date in yyyy-mm-dd format Returns: str: A report of insider sentiment data """ - return route_to_vendor("get_insider_sentiment", ticker, curr_date) + return _process_vendor_call("get_insider_sentiment", ticker, curr_date) @tool def get_insider_transactions( @@ -64,8 +102,8 @@ def get_insider_transactions( Uses the configured news_data vendor. Args: ticker (str): Ticker symbol of the company - curr_date (str): Current date you are trading at, yyyy-mm-dd + curr_date (str): Current date in yyyy-mm-dd format Returns: str: A report of insider transaction data """ - return route_to_vendor("get_insider_transactions", ticker, curr_date) + return _process_vendor_call("get_insider_transactions", ticker, curr_date) diff --git a/tradingagents/agents/utils/technical_indicators_tools.py b/tradingagents/agents/utils/technical_indicators_tools.py index c6c08bca..01f9cc12 100644 --- a/tradingagents/agents/utils/technical_indicators_tools.py +++ b/tradingagents/agents/utils/technical_indicators_tools.py @@ -1,6 +1,7 @@ from langchain_core.tools import tool from typing import Annotated from tradingagents.dataflows.interface import route_to_vendor +from tradingagents.utils.anonymizer import TickerAnonymizer @tool def get_indicators( @@ -20,4 +21,18 @@ def get_indicators( Returns: str: A formatted dataframe containing the technical indicators for the specified ticker symbol and indicator. """ - return route_to_vendor("get_indicators", symbol, indicator, curr_date, look_back_days) \ No newline at end of file + # Initialize anonymizer locally to ensure fresh state loading + anonymizer = TickerAnonymizer() + + # 1. Deanonymize ticker + real_ticker = anonymizer.deanonymize_ticker(symbol) + if not real_ticker: + real_ticker = symbol + + # 2. Get Data + raw_data = route_to_vendor("get_indicators", real_ticker, indicator, curr_date, look_back_days) + + # 3. Anonymize Output + anonymized_data = anonymizer.anonymize_text(raw_data, real_ticker) + + return anonymized_data \ No newline at end of file diff --git a/tradingagents/utils/anonymizer.py b/tradingagents/utils/anonymizer.py index d7525337..e0bfc2cd 100644 --- a/tradingagents/utils/anonymizer.py +++ b/tradingagents/utils/anonymizer.py @@ -25,12 +25,18 @@ class TickerAnonymizer: CRITICAL: Uses adjusted close prices to handle dividends and splits. """ - def __init__(self, seed: str = "blindfire_v1"): + def __init__(self, seed: str = "blindfire_v1", auto_persist: bool = True): self.seed = seed self.ticker_map = {} self.reverse_map = {} self.company_names = {} self.baseline_prices = {} # Store baseline for normalization + self.auto_persist = auto_persist + + # Persistence path + self.map_file = Path("ticker_map.json") + if self.auto_persist: + self._load_from_file() # Product name mappings self.product_map = { @@ -58,6 +64,36 @@ class TickerAnonymizer: "YouTube": "Video Platform A", "Android": "Mobile OS A", } + + def _load_from_file(self): + """Load mapping from disk if exists""" + if self.map_file.exists(): + try: + with open(self.map_file, 'r') as f: + data = json.load(f) + # Merge loaded data + self.ticker_map.update(data.get("ticker_map", {})) + self.reverse_map.update(data.get("reverse_map", {})) + self.company_names.update(data.get("company_names", {})) + except Exception as e: + print(f"Warning: Failed to load ticker map: {e}") + + def _save_to_file(self): + """Save mapping to disk""" + if not self.auto_persist: + return + + data = { + "ticker_map": self.ticker_map, + "reverse_map": self.reverse_map, + "company_names": self.company_names, + "seed": self.seed + } + try: + with open(self.map_file, 'w') as f: + json.dump(data, f, indent=2) + except Exception as e: + print(f"Warning: Failed to save ticker map: {e}") def anonymize_ticker(self, ticker: str) -> str: """ @@ -75,11 +111,15 @@ class TickerAnonymizer: anon_label = f"ASSET_{hash_val % 1000:03d}" self.ticker_map[ticker] = anon_label self.reverse_map[anon_label] = ticker + self._save_to_file() # Save on new mapping + return self.ticker_map[ticker] def set_company_name(self, ticker: str, company_name: str): """Store company name for anonymization.""" - self.company_names[ticker] = company_name + if ticker not in self.company_names or self.company_names[ticker] != company_name: + self.company_names[ticker] = company_name + self._save_to_file() def anonymize_text(self, text: str, ticker: str) -> str: """ diff --git a/verify_google_key.py b/verify_google_key.py new file mode 100644 index 00000000..5e1e4c7a --- /dev/null +++ b/verify_google_key.py @@ -0,0 +1,34 @@ +import os +from openai import OpenAI +from dotenv import load_dotenv + +# Load env +load_dotenv() + +key = os.getenv("GOOGLE_API_KEY") +print(f"Checking GOOGLE_API_KEY...") +if not key: + print("❌ GOOGLE_API_KEY not found in environment or .env file.") + exit(1) + +print(f"✅ Key found: {key[:4]}...{key[-4:]}") + +client = OpenAI( + api_key=key, + base_url="https://generativelanguage.googleapis.com/v1beta/openai/" +) + +print("Attempting to generate embedding for 'Hello World'...") +try: + resp = client.embeddings.create( + model="text-embedding-004", + input="Hello world" + ) + print("✅ Embedding Success! The API Key is valid and the model is accessible.") + print(f"Embedding vector length: {len(resp.data[0].embedding)}") +except Exception as e: + print(f"❌ Embedding Failed: {e}") + print("\nTroubleshooting:") + print("1. Ensure the API Key is from Google AI Studio (aistudio.google.com).") + print("2. Ensure the 'Generative Language API' is enabled in Google Cloud Console if using a GCP project.") + print("3. Verify you have not exceeded your quota.")