diff --git a/tradingagents/dataflows/openai.py b/tradingagents/dataflows/openai.py index d287efbf..8e52a948 100644 --- a/tradingagents/dataflows/openai.py +++ b/tradingagents/dataflows/openai.py @@ -113,18 +113,18 @@ def get_batch_stock_news_openai( class PortfolioUpdate(BaseModel): items: List[TickerNews] - from tqdm import tqdm - client = _get_openai_client() results = {} + total_batches = (len(tickers) + batch_size - 1) // batch_size # Process in batches to avoid output token limits - with tqdm(total=len(tickers), desc="📰 OpenAI batch news", unit="ticker") as pbar: - for i in range(0, len(tickers), batch_size): - batch = tickers[i : i + batch_size] + for i in range(0, len(tickers), batch_size): + batch = tickers[i : i + batch_size] + batch_num = i // batch_size + 1 + logger.info(f"📰 OpenAI news batch {batch_num}/{total_batches}: {batch}") - # Request comprehensive news summaries for better ranker LLM context - prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}. + # Request comprehensive news summaries for better ranker LLM context + prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}. Focus on business catalysts: earnings, product launches, partnerships, analyst changes, regulatory news. @@ -135,32 +135,29 @@ For each ticker, provide a comprehensive summary (5-8 sentences) covering: - Market reaction or implications - Any forward-looking statements or guidance""" - try: - completion = client.responses.parse( - model="gpt-5-nano", - tools=[{"type": "web_search"}], - input=prompt, - text_format=PortfolioUpdate, - ) + try: + completion = client.responses.parse( + model="gpt-5-nano", + tools=[{"type": "web_search"}], + input=prompt, + text_format=PortfolioUpdate, + ) - # Extract structured output - if completion.output_parsed: - for item in completion.output_parsed.items: - results[item.ticker.upper()] = item.news_summary - else: - # Fallback if parsing failed - logger.warning(f"Structured parsing returned None for batch: {batch}") - for ticker in batch: - results[ticker.upper()] = "" - - except Exception as e: - logger.error(f"Error fetching batch news for {batch}: {e}") - # On error, set empty string for all tickers in batch + # Extract structured output + if completion.output_parsed: + for item in completion.output_parsed.items: + results[item.ticker.upper()] = item.news_summary + else: + # Fallback if parsing failed + logger.warning(f"Structured parsing returned None for batch: {batch}") for ticker in batch: results[ticker.upper()] = "" - # Update progress bar - pbar.update(len(batch)) + except Exception as e: + logger.error(f"Error fetching batch news for {batch}: {e}") + # On error, set empty string for all tickers in batch + for ticker in batch: + results[ticker.upper()] = "" return results @@ -218,15 +215,16 @@ def get_batch_stock_news_google( ).with_structured_output(PortfolioUpdate, method="json_schema") results = {} - from tqdm import tqdm + total_batches = (len(tickers) + batch_size - 1) // batch_size # Process in batches - with tqdm(total=len(tickers), desc="📰 Google batch news", unit="ticker") as pbar: - for i in range(0, len(tickers), batch_size): - batch = tickers[i : i + batch_size] + for i in range(0, len(tickers), batch_size): + batch = tickers[i : i + batch_size] + batch_num = i // batch_size + 1 + logger.info(f"📰 Google news batch {batch_num}/{total_batches}: {batch}") - # Request comprehensive news summaries for better ranker LLM context - prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}. + # Request comprehensive news summaries for better ranker LLM context + prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}. Focus on business catalysts: earnings, product launches, partnerships, analyst changes, regulatory news. @@ -237,33 +235,30 @@ For each ticker, provide a comprehensive summary (5-8 sentences) covering: - Market reaction or implications - Any forward-looking statements or guidance""" - try: - # Step 1: Perform Google search (grounded response) - raw_news = search_llm.invoke(prompt) + try: + # Step 1: Perform Google search (grounded response) + raw_news = search_llm.invoke(prompt) - # Step 2: Structure the grounded results - structured_result = structured_llm.invoke( - f"Using this verified news data: {raw_news.content}\n\n" - f"Format the news for these tickers into the JSON structure: {batch}\n" - f"Include all tickers from the list, even if no news was found." - ) + # Step 2: Structure the grounded results + structured_result = structured_llm.invoke( + f"Using this verified news data: {raw_news.content}\n\n" + f"Format the news for these tickers into the JSON structure: {batch}\n" + f"Include all tickers from the list, even if no news was found." + ) - # Extract results - if structured_result and hasattr(structured_result, "items"): - for item in structured_result.items: - results[item.ticker.upper()] = item.news_summary - else: - logger.warning(f"Structured output invalid for batch: {batch}") - for ticker in batch: - results[ticker.upper()] = "" - - except Exception as e: - logger.error(f"Error fetching Google batch news for {batch}: {e}") - # On error, set empty string for all tickers in batch + # Extract results + if structured_result and hasattr(structured_result, "items"): + for item in structured_result.items: + results[item.ticker.upper()] = item.news_summary + else: + logger.warning(f"Structured output invalid for batch: {batch}") for ticker in batch: results[ticker.upper()] = "" - # Update progress bar - pbar.update(len(batch)) + except Exception as e: + logger.error(f"Error fetching Google batch news for {batch}: {e}") + # On error, set empty string for all tickers in batch + for ticker in batch: + results[ticker.upper()] = "" return results diff --git a/tradingagents/dataflows/reddit_api.py b/tradingagents/dataflows/reddit_api.py index bdd2ce9c..73fa8278 100644 --- a/tradingagents/dataflows/reddit_api.py +++ b/tradingagents/dataflows/reddit_api.py @@ -342,7 +342,8 @@ def get_reddit_undiscovered_dd( top_n: Annotated[int, "Number of top DD posts to return"] = 10, num_comments: Annotated[int, "Number of top comments to include"] = 10, llm_evaluator=None, # Will be passed from discovery graph -) -> str: + as_list: bool = False, +) -> str | list: """ Find high-quality undiscovered DD using LLM evaluation. @@ -383,18 +384,19 @@ def get_reddit_undiscovered_dd( if not submission.selftext or len(submission.selftext) < 200: continue - # Get top comments for community validation - submission.comment_sort = "top" - submission.comments.replace_more(limit=0) top_comments = [] - for comment in submission.comments[:num_comments]: - if hasattr(comment, "body") and hasattr(comment, "score"): - top_comments.append( - { - "body": comment.body[:1000], # Include more of each comment - "score": comment.score, - } - ) + if llm_evaluator: + # Get top comments for community validation + submission.comment_sort = "top" + submission.comments.replace_more(limit=0) + for comment in submission.comments[:num_comments]: + if hasattr(comment, "body") and hasattr(comment, "score"): + top_comments.append( + { + "body": comment.body[:1000], # Include more of each comment + "score": comment.score, + } + ) candidate_posts.append( { @@ -517,27 +519,11 @@ Extract all stock ticker symbols mentioned in the post or comments.""" return post - # Parallel evaluation with progress tracking - try: - from tqdm import tqdm - - use_tqdm = True - except ImportError: - use_tqdm = False - + # Parallel evaluation + logger.info(f"Scanning {len(candidate_posts)} Reddit posts with LLM...") with ThreadPoolExecutor(max_workers=10) as executor: futures = [executor.submit(evaluate_post, post) for post in candidate_posts] - - if use_tqdm: - # With progress bar - evaluated = [] - for future in tqdm( - as_completed(futures), total=len(futures), desc=" Evaluating posts" - ): - evaluated.append(future.result()) - else: - # Without progress bar (fallback) - evaluated = [f.result() for f in as_completed(futures)] + evaluated = [f.result() for f in as_completed(futures)] # Filter quality threshold (55+ = decent DD) quality_dd = [p for p in evaluated if p["quality_score"] >= 55] @@ -559,6 +545,18 @@ Extract all stock ticker symbols mentioned in the post or comments.""" candidate_posts.sort(key=lambda x: x["full_length"] + (x["score"] * 10), reverse=True) top_dd = candidate_posts[:top_n] + if as_list: + if not llm_evaluator: + import re + + ticker_pattern = r"\$([A-Z]{2,5})\b|^([A-Z]{2,5})\s" + for post in top_dd: + matches = re.findall(ticker_pattern, post["title"] + " " + post["text"]) + tickers = list(set([t[0] or t[1] for t in matches if t[0] or t[1]])) + post["ticker"] = tickers[0] if tickers else "" + post["quality_score"] = 75 # default to Medium priority + return top_dd + if not top_dd: return f"# Undiscovered DD\n\nNo high-quality DD found (scanned {len(candidate_posts)} posts)."