fix(filter): replace tqdm with logger in batch news functions to fix I/O error

tqdm writes to stderr immediately on __enter__, before any loop iteration. In Streamlit's thread/subprocess context stderr can be a closed pipe, causing 'I/O operation on closed file' which _run_call catches and returns {} — so the entire news enrichment step was silently skipped every run. Replaced tqdm progress bars with logger.info() calls in: - get_batch_stock_news_google() in openai.py - get_batch_stock_news_openai() in openai.py - Reddit DD parallel evaluation in reddit_api.py Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-21 13:55:13 -08:00 · 2026-02-21 13:55:13 -08:00 · 61b731ac28
parent 21b33c6709
commit 61b731ac28
2 changed files with 82 additions and 89 deletions
--- a/tradingagents/dataflows/openai.py
+++ b/tradingagents/dataflows/openai.py
@ -113,18 +113,18 @@ def get_batch_stock_news_openai(
    class PortfolioUpdate(BaseModel):
        items: List[TickerNews]

-    from tqdm import tqdm
-
    client = _get_openai_client()
    results = {}
+    total_batches = (len(tickers) + batch_size - 1) // batch_size

    # Process in batches to avoid output token limits
-    with tqdm(total=len(tickers), desc="📰 OpenAI batch news", unit="ticker") as pbar:
-        for i in range(0, len(tickers), batch_size):
-            batch = tickers[i : i + batch_size]
+    for i in range(0, len(tickers), batch_size):
+        batch = tickers[i : i + batch_size]
+        batch_num = i // batch_size + 1
+        logger.info(f"📰 OpenAI news batch {batch_num}/{total_batches}: {batch}")

-            # Request comprehensive news summaries for better ranker LLM context
-            prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.
+        # Request comprehensive news summaries for better ranker LLM context
+        prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.

 Focus on business catalysts: earnings, product launches, partnerships, analyst changes, regulatory news.

@ -135,32 +135,29 @@ For each ticker, provide a comprehensive summary (5-8 sentences) covering:
 - Market reaction or implications
 - Any forward-looking statements or guidance"""

-            try:
-                completion = client.responses.parse(
-                    model="gpt-5-nano",
-                    tools=[{"type": "web_search"}],
-                    input=prompt,
-                    text_format=PortfolioUpdate,
-                )
+        try:
+            completion = client.responses.parse(
+                model="gpt-5-nano",
+                tools=[{"type": "web_search"}],
+                input=prompt,
+                text_format=PortfolioUpdate,
+            )

-                # Extract structured output
-                if completion.output_parsed:
-                    for item in completion.output_parsed.items:
-                        results[item.ticker.upper()] = item.news_summary
-                else:
-                    # Fallback if parsing failed
-                    logger.warning(f"Structured parsing returned None for batch: {batch}")
-                    for ticker in batch:
-                        results[ticker.upper()] = ""
-
-            except Exception as e:
-                logger.error(f"Error fetching batch news for {batch}: {e}")
-                # On error, set empty string for all tickers in batch
+            # Extract structured output
+            if completion.output_parsed:
+                for item in completion.output_parsed.items:
+                    results[item.ticker.upper()] = item.news_summary
+            else:
+                # Fallback if parsing failed
+                logger.warning(f"Structured parsing returned None for batch: {batch}")
                for ticker in batch:
                    results[ticker.upper()] = ""

-            # Update progress bar
-            pbar.update(len(batch))
+        except Exception as e:
+            logger.error(f"Error fetching batch news for {batch}: {e}")
+            # On error, set empty string for all tickers in batch
+            for ticker in batch:
+                results[ticker.upper()] = ""

    return results

@ -218,15 +215,16 @@ def get_batch_stock_news_google(
    ).with_structured_output(PortfolioUpdate, method="json_schema")
    results = {}

-    from tqdm import tqdm
+    total_batches = (len(tickers) + batch_size - 1) // batch_size

    # Process in batches
-    with tqdm(total=len(tickers), desc="📰 Google batch news", unit="ticker") as pbar:
-        for i in range(0, len(tickers), batch_size):
-            batch = tickers[i : i + batch_size]
+    for i in range(0, len(tickers), batch_size):
+        batch = tickers[i : i + batch_size]
+        batch_num = i // batch_size + 1
+        logger.info(f"📰 Google news batch {batch_num}/{total_batches}: {batch}")

-            # Request comprehensive news summaries for better ranker LLM context
-            prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.
+        # Request comprehensive news summaries for better ranker LLM context
+        prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.

 Focus on business catalysts: earnings, product launches, partnerships, analyst changes, regulatory news.

@ -237,33 +235,30 @@ For each ticker, provide a comprehensive summary (5-8 sentences) covering:
 - Market reaction or implications
 - Any forward-looking statements or guidance"""

-            try:
-                # Step 1: Perform Google search (grounded response)
-                raw_news = search_llm.invoke(prompt)
+        try:
+            # Step 1: Perform Google search (grounded response)
+            raw_news = search_llm.invoke(prompt)

-                # Step 2: Structure the grounded results
-                structured_result = structured_llm.invoke(
-                    f"Using this verified news data: {raw_news.content}\n\n"
-                    f"Format the news for these tickers into the JSON structure: {batch}\n"
-                    f"Include all tickers from the list, even if no news was found."
-                )
+            # Step 2: Structure the grounded results
+            structured_result = structured_llm.invoke(
+                f"Using this verified news data: {raw_news.content}\n\n"
+                f"Format the news for these tickers into the JSON structure: {batch}\n"
+                f"Include all tickers from the list, even if no news was found."
+            )

-                # Extract results
-                if structured_result and hasattr(structured_result, "items"):
-                    for item in structured_result.items:
-                        results[item.ticker.upper()] = item.news_summary
-                else:
-                    logger.warning(f"Structured output invalid for batch: {batch}")
-                    for ticker in batch:
-                        results[ticker.upper()] = ""
-
-            except Exception as e:
-                logger.error(f"Error fetching Google batch news for {batch}: {e}")
-                # On error, set empty string for all tickers in batch
+            # Extract results
+            if structured_result and hasattr(structured_result, "items"):
+                for item in structured_result.items:
+                    results[item.ticker.upper()] = item.news_summary
+            else:
+                logger.warning(f"Structured output invalid for batch: {batch}")
                for ticker in batch:
                    results[ticker.upper()] = ""

-            # Update progress bar
-            pbar.update(len(batch))
+        except Exception as e:
+            logger.error(f"Error fetching Google batch news for {batch}: {e}")
+            # On error, set empty string for all tickers in batch
+            for ticker in batch:
+                results[ticker.upper()] = ""

    return results
--- a/tradingagents/dataflows/reddit_api.py
+++ b/tradingagents/dataflows/reddit_api.py
@ -342,7 +342,8 @@ def get_reddit_undiscovered_dd(
    top_n: Annotated[int, "Number of top DD posts to return"] = 10,
    num_comments: Annotated[int, "Number of top comments to include"] = 10,
    llm_evaluator=None,  # Will be passed from discovery graph
-) -> str:
+    as_list: bool = False,
+) -> str | list:
    """
    Find high-quality undiscovered DD using LLM evaluation.

@ -383,18 +384,19 @@ def get_reddit_undiscovered_dd(
            if not submission.selftext or len(submission.selftext) < 200:
                continue

-            # Get top comments for community validation
-            submission.comment_sort = "top"
-            submission.comments.replace_more(limit=0)
            top_comments = []
-            for comment in submission.comments[:num_comments]:
-                if hasattr(comment, "body") and hasattr(comment, "score"):
-                    top_comments.append(
-                        {
-                            "body": comment.body[:1000],  # Include more of each comment
-                            "score": comment.score,
-                        }
-                    )
+            if llm_evaluator:
+                # Get top comments for community validation
+                submission.comment_sort = "top"
+                submission.comments.replace_more(limit=0)
+                for comment in submission.comments[:num_comments]:
+                    if hasattr(comment, "body") and hasattr(comment, "score"):
+                        top_comments.append(
+                            {
+                                "body": comment.body[:1000],  # Include more of each comment
+                                "score": comment.score,
+                            }
+                        )

            candidate_posts.append(
                {
@ -517,27 +519,11 @@ Extract all stock ticker symbols mentioned in the post or comments."""

                return post

-            # Parallel evaluation with progress tracking
-            try:
-                from tqdm import tqdm
-
-                use_tqdm = True
-            except ImportError:
-                use_tqdm = False
-
+            # Parallel evaluation
+            logger.info(f"Scanning {len(candidate_posts)} Reddit posts with LLM...")
            with ThreadPoolExecutor(max_workers=10) as executor:
                futures = [executor.submit(evaluate_post, post) for post in candidate_posts]
-
-                if use_tqdm:
-                    # With progress bar
-                    evaluated = []
-                    for future in tqdm(
-                        as_completed(futures), total=len(futures), desc="   Evaluating posts"
-                    ):
-                        evaluated.append(future.result())
-                else:
-                    # Without progress bar (fallback)
-                    evaluated = [f.result() for f in as_completed(futures)]
+                evaluated = [f.result() for f in as_completed(futures)]

            # Filter quality threshold (55+ = decent DD)
            quality_dd = [p for p in evaluated if p["quality_score"] >= 55]
@ -559,6 +545,18 @@ Extract all stock ticker symbols mentioned in the post or comments."""
            candidate_posts.sort(key=lambda x: x["full_length"] + (x["score"] * 10), reverse=True)
            top_dd = candidate_posts[:top_n]

+        if as_list:
+            if not llm_evaluator:
+                import re
+
+                ticker_pattern = r"\$([A-Z]{2,5})\b|^([A-Z]{2,5})\s"
+                for post in top_dd:
+                    matches = re.findall(ticker_pattern, post["title"] + " " + post["text"])
+                    tickers = list(set([t[0] or t[1] for t in matches if t[0] or t[1]]))
+                    post["ticker"] = tickers[0] if tickers else ""
+                    post["quality_score"] = 75  # default to Medium priority
+            return top_dd
+
        if not top_dd:
            return f"# Undiscovered DD\n\nNo high-quality DD found (scanned {len(candidate_posts)} posts)."