fix(filter): replace tqdm with logger in batch news functions to fix I/O error

tqdm writes to stderr immediately on __enter__, before any loop iteration. In Streamlit's thread/subprocess context stderr can be a closed pipe, causing 'I/O operation on closed file' which _run_call catches and returns {} — so the entire news enrichment step was silently skipped every run. Replaced tqdm progress bars with logger.info() calls in: - get_batch_stock_news_google() in openai.py - get_batch_stock_news_openai() in openai.py - Reddit DD parallel evaluation in reddit_api.py Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-21 13:55:13 -08:00 · 2026-02-21 13:55:13 -08:00 · 61b731ac28
parent 21b33c6709
commit 61b731ac28
2 changed files with 82 additions and 89 deletions
--- a/tradingagents/dataflows/openai.py
+++ b/tradingagents/dataflows/openai.py
@ -113,15 +113,15 @@ def get_batch_stock_news_openai(
    class PortfolioUpdate(BaseModel):
        items: List[TickerNews]
    from tqdm import tqdm
    client = _get_openai_client()
    results = {}
    total_batches = (len(tickers) + batch_size - 1) // batch_size
    # Process in batches to avoid output token limits
    with tqdm(total=len(tickers), desc="📰 OpenAI batch news", unit="ticker") as pbar:
    for i in range(0, len(tickers), batch_size):
        batch = tickers[i : i + batch_size]
        batch_num = i // batch_size + 1
        logger.info(f"📰 OpenAI news batch {batch_num}/{total_batches}: {batch}")
        # Request comprehensive news summaries for better ranker LLM context
        prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.
@ -159,9 +159,6 @@ For each ticker, provide a comprehensive summary (5-8 sentences) covering:
            for ticker in batch:
                results[ticker.upper()] = ""
            # Update progress bar
            pbar.update(len(batch))
    return results
@ -218,12 +215,13 @@ def get_batch_stock_news_google(
    ).with_structured_output(PortfolioUpdate, method="json_schema")
    results = {}
-    from tqdm import tqdm
+    total_batches = (len(tickers) + batch_size - 1) // batch_size
    # Process in batches
    with tqdm(total=len(tickers), desc="📰 Google batch news", unit="ticker") as pbar:
    for i in range(0, len(tickers), batch_size):
        batch = tickers[i : i + batch_size]
        batch_num = i // batch_size + 1
        logger.info(f"📰 Google news batch {batch_num}/{total_batches}: {batch}")
        # Request comprehensive news summaries for better ranker LLM context
        prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.
@ -263,7 +261,4 @@ For each ticker, provide a comprehensive summary (5-8 sentences) covering:
            for ticker in batch:
                results[ticker.upper()] = ""
            # Update progress bar
            pbar.update(len(batch))
    return results
--- a/tradingagents/dataflows/reddit_api.py
+++ b/tradingagents/dataflows/reddit_api.py
@ -342,7 +342,8 @@ def get_reddit_undiscovered_dd(
    top_n: Annotated[int, "Number of top DD posts to return"] = 10,
    num_comments: Annotated[int, "Number of top comments to include"] = 10,
    llm_evaluator=None,  # Will be passed from discovery graph
-) -> str:
+    as_list: bool = False,
 ) -> str | list:
    """
    Find high-quality undiscovered DD using LLM evaluation.
@ -383,10 +384,11 @@ def get_reddit_undiscovered_dd(
            if not submission.selftext or len(submission.selftext) < 200:
                continue
            top_comments = []
            if llm_evaluator:
                # Get top comments for community validation
                submission.comment_sort = "top"
                submission.comments.replace_more(limit=0)
            top_comments = []
                for comment in submission.comments[:num_comments]:
                    if hasattr(comment, "body") and hasattr(comment, "score"):
                        top_comments.append(
@ -517,26 +519,10 @@ Extract all stock ticker symbols mentioned in the post or comments."""
                return post
-            # Parallel evaluation with progress tracking
+            # Parallel evaluation
-            try:
+            logger.info(f"Scanning {len(candidate_posts)} Reddit posts with LLM...")
                from tqdm import tqdm
                use_tqdm = True
            except ImportError:
                use_tqdm = False
            with ThreadPoolExecutor(max_workers=10) as executor:
                futures = [executor.submit(evaluate_post, post) for post in candidate_posts]
                if use_tqdm:
                    # With progress bar
                    evaluated = []
                    for future in tqdm(
                        as_completed(futures), total=len(futures), desc="   Evaluating posts"
                    ):
                        evaluated.append(future.result())
                else:
                    # Without progress bar (fallback)
                evaluated = [f.result() for f in as_completed(futures)]
            # Filter quality threshold (55+ = decent DD)
@ -559,6 +545,18 @@ Extract all stock ticker symbols mentioned in the post or comments."""
            candidate_posts.sort(key=lambda x: x["full_length"] + (x["score"] * 10), reverse=True)
            top_dd = candidate_posts[:top_n]
        if as_list:
            if not llm_evaluator:
                import re
                ticker_pattern = r"\$([A-Z]{2,5})\b|^([A-Z]{2,5})\s"
                for post in top_dd:
                    matches = re.findall(ticker_pattern, post["title"] + " " + post["text"])
                    tickers = list(set([t[0] or t[1] for t in matches if t[0] or t[1]]))
                    post["ticker"] = tickers[0] if tickers else ""
                    post["quality_score"] = 75  # default to Medium priority
            return top_dd
        if not top_dd:
            return f"# Undiscovered DD\n\nNo high-quality DD found (scanned {len(candidate_posts)} posts)."