fix(filter): replace tqdm with logger in batch news functions to fix I/O error
tqdm writes to stderr immediately on __enter__, before any loop iteration.
In Streamlit's thread/subprocess context stderr can be a closed pipe, causing
'I/O operation on closed file' which _run_call catches and returns {} — so
the entire news enrichment step was silently skipped every run.
Replaced tqdm progress bars with logger.info() calls in:
- get_batch_stock_news_google() in openai.py
- get_batch_stock_news_openai() in openai.py
- Reddit DD parallel evaluation in reddit_api.py
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
21b33c6709
commit
61b731ac28
|
|
@ -113,18 +113,18 @@ def get_batch_stock_news_openai(
|
|||
class PortfolioUpdate(BaseModel):
|
||||
items: List[TickerNews]
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
client = _get_openai_client()
|
||||
results = {}
|
||||
total_batches = (len(tickers) + batch_size - 1) // batch_size
|
||||
|
||||
# Process in batches to avoid output token limits
|
||||
with tqdm(total=len(tickers), desc="📰 OpenAI batch news", unit="ticker") as pbar:
|
||||
for i in range(0, len(tickers), batch_size):
|
||||
batch = tickers[i : i + batch_size]
|
||||
for i in range(0, len(tickers), batch_size):
|
||||
batch = tickers[i : i + batch_size]
|
||||
batch_num = i // batch_size + 1
|
||||
logger.info(f"📰 OpenAI news batch {batch_num}/{total_batches}: {batch}")
|
||||
|
||||
# Request comprehensive news summaries for better ranker LLM context
|
||||
prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.
|
||||
# Request comprehensive news summaries for better ranker LLM context
|
||||
prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.
|
||||
|
||||
Focus on business catalysts: earnings, product launches, partnerships, analyst changes, regulatory news.
|
||||
|
||||
|
|
@ -135,32 +135,29 @@ For each ticker, provide a comprehensive summary (5-8 sentences) covering:
|
|||
- Market reaction or implications
|
||||
- Any forward-looking statements or guidance"""
|
||||
|
||||
try:
|
||||
completion = client.responses.parse(
|
||||
model="gpt-5-nano",
|
||||
tools=[{"type": "web_search"}],
|
||||
input=prompt,
|
||||
text_format=PortfolioUpdate,
|
||||
)
|
||||
try:
|
||||
completion = client.responses.parse(
|
||||
model="gpt-5-nano",
|
||||
tools=[{"type": "web_search"}],
|
||||
input=prompt,
|
||||
text_format=PortfolioUpdate,
|
||||
)
|
||||
|
||||
# Extract structured output
|
||||
if completion.output_parsed:
|
||||
for item in completion.output_parsed.items:
|
||||
results[item.ticker.upper()] = item.news_summary
|
||||
else:
|
||||
# Fallback if parsing failed
|
||||
logger.warning(f"Structured parsing returned None for batch: {batch}")
|
||||
for ticker in batch:
|
||||
results[ticker.upper()] = ""
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching batch news for {batch}: {e}")
|
||||
# On error, set empty string for all tickers in batch
|
||||
# Extract structured output
|
||||
if completion.output_parsed:
|
||||
for item in completion.output_parsed.items:
|
||||
results[item.ticker.upper()] = item.news_summary
|
||||
else:
|
||||
# Fallback if parsing failed
|
||||
logger.warning(f"Structured parsing returned None for batch: {batch}")
|
||||
for ticker in batch:
|
||||
results[ticker.upper()] = ""
|
||||
|
||||
# Update progress bar
|
||||
pbar.update(len(batch))
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching batch news for {batch}: {e}")
|
||||
# On error, set empty string for all tickers in batch
|
||||
for ticker in batch:
|
||||
results[ticker.upper()] = ""
|
||||
|
||||
return results
|
||||
|
||||
|
|
@ -218,15 +215,16 @@ def get_batch_stock_news_google(
|
|||
).with_structured_output(PortfolioUpdate, method="json_schema")
|
||||
results = {}
|
||||
|
||||
from tqdm import tqdm
|
||||
total_batches = (len(tickers) + batch_size - 1) // batch_size
|
||||
|
||||
# Process in batches
|
||||
with tqdm(total=len(tickers), desc="📰 Google batch news", unit="ticker") as pbar:
|
||||
for i in range(0, len(tickers), batch_size):
|
||||
batch = tickers[i : i + batch_size]
|
||||
for i in range(0, len(tickers), batch_size):
|
||||
batch = tickers[i : i + batch_size]
|
||||
batch_num = i // batch_size + 1
|
||||
logger.info(f"📰 Google news batch {batch_num}/{total_batches}: {batch}")
|
||||
|
||||
# Request comprehensive news summaries for better ranker LLM context
|
||||
prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.
|
||||
# Request comprehensive news summaries for better ranker LLM context
|
||||
prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.
|
||||
|
||||
Focus on business catalysts: earnings, product launches, partnerships, analyst changes, regulatory news.
|
||||
|
||||
|
|
@ -237,33 +235,30 @@ For each ticker, provide a comprehensive summary (5-8 sentences) covering:
|
|||
- Market reaction or implications
|
||||
- Any forward-looking statements or guidance"""
|
||||
|
||||
try:
|
||||
# Step 1: Perform Google search (grounded response)
|
||||
raw_news = search_llm.invoke(prompt)
|
||||
try:
|
||||
# Step 1: Perform Google search (grounded response)
|
||||
raw_news = search_llm.invoke(prompt)
|
||||
|
||||
# Step 2: Structure the grounded results
|
||||
structured_result = structured_llm.invoke(
|
||||
f"Using this verified news data: {raw_news.content}\n\n"
|
||||
f"Format the news for these tickers into the JSON structure: {batch}\n"
|
||||
f"Include all tickers from the list, even if no news was found."
|
||||
)
|
||||
# Step 2: Structure the grounded results
|
||||
structured_result = structured_llm.invoke(
|
||||
f"Using this verified news data: {raw_news.content}\n\n"
|
||||
f"Format the news for these tickers into the JSON structure: {batch}\n"
|
||||
f"Include all tickers from the list, even if no news was found."
|
||||
)
|
||||
|
||||
# Extract results
|
||||
if structured_result and hasattr(structured_result, "items"):
|
||||
for item in structured_result.items:
|
||||
results[item.ticker.upper()] = item.news_summary
|
||||
else:
|
||||
logger.warning(f"Structured output invalid for batch: {batch}")
|
||||
for ticker in batch:
|
||||
results[ticker.upper()] = ""
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching Google batch news for {batch}: {e}")
|
||||
# On error, set empty string for all tickers in batch
|
||||
# Extract results
|
||||
if structured_result and hasattr(structured_result, "items"):
|
||||
for item in structured_result.items:
|
||||
results[item.ticker.upper()] = item.news_summary
|
||||
else:
|
||||
logger.warning(f"Structured output invalid for batch: {batch}")
|
||||
for ticker in batch:
|
||||
results[ticker.upper()] = ""
|
||||
|
||||
# Update progress bar
|
||||
pbar.update(len(batch))
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching Google batch news for {batch}: {e}")
|
||||
# On error, set empty string for all tickers in batch
|
||||
for ticker in batch:
|
||||
results[ticker.upper()] = ""
|
||||
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -342,7 +342,8 @@ def get_reddit_undiscovered_dd(
|
|||
top_n: Annotated[int, "Number of top DD posts to return"] = 10,
|
||||
num_comments: Annotated[int, "Number of top comments to include"] = 10,
|
||||
llm_evaluator=None, # Will be passed from discovery graph
|
||||
) -> str:
|
||||
as_list: bool = False,
|
||||
) -> str | list:
|
||||
"""
|
||||
Find high-quality undiscovered DD using LLM evaluation.
|
||||
|
||||
|
|
@ -383,18 +384,19 @@ def get_reddit_undiscovered_dd(
|
|||
if not submission.selftext or len(submission.selftext) < 200:
|
||||
continue
|
||||
|
||||
# Get top comments for community validation
|
||||
submission.comment_sort = "top"
|
||||
submission.comments.replace_more(limit=0)
|
||||
top_comments = []
|
||||
for comment in submission.comments[:num_comments]:
|
||||
if hasattr(comment, "body") and hasattr(comment, "score"):
|
||||
top_comments.append(
|
||||
{
|
||||
"body": comment.body[:1000], # Include more of each comment
|
||||
"score": comment.score,
|
||||
}
|
||||
)
|
||||
if llm_evaluator:
|
||||
# Get top comments for community validation
|
||||
submission.comment_sort = "top"
|
||||
submission.comments.replace_more(limit=0)
|
||||
for comment in submission.comments[:num_comments]:
|
||||
if hasattr(comment, "body") and hasattr(comment, "score"):
|
||||
top_comments.append(
|
||||
{
|
||||
"body": comment.body[:1000], # Include more of each comment
|
||||
"score": comment.score,
|
||||
}
|
||||
)
|
||||
|
||||
candidate_posts.append(
|
||||
{
|
||||
|
|
@ -517,27 +519,11 @@ Extract all stock ticker symbols mentioned in the post or comments."""
|
|||
|
||||
return post
|
||||
|
||||
# Parallel evaluation with progress tracking
|
||||
try:
|
||||
from tqdm import tqdm
|
||||
|
||||
use_tqdm = True
|
||||
except ImportError:
|
||||
use_tqdm = False
|
||||
|
||||
# Parallel evaluation
|
||||
logger.info(f"Scanning {len(candidate_posts)} Reddit posts with LLM...")
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
futures = [executor.submit(evaluate_post, post) for post in candidate_posts]
|
||||
|
||||
if use_tqdm:
|
||||
# With progress bar
|
||||
evaluated = []
|
||||
for future in tqdm(
|
||||
as_completed(futures), total=len(futures), desc=" Evaluating posts"
|
||||
):
|
||||
evaluated.append(future.result())
|
||||
else:
|
||||
# Without progress bar (fallback)
|
||||
evaluated = [f.result() for f in as_completed(futures)]
|
||||
evaluated = [f.result() for f in as_completed(futures)]
|
||||
|
||||
# Filter quality threshold (55+ = decent DD)
|
||||
quality_dd = [p for p in evaluated if p["quality_score"] >= 55]
|
||||
|
|
@ -559,6 +545,18 @@ Extract all stock ticker symbols mentioned in the post or comments."""
|
|||
candidate_posts.sort(key=lambda x: x["full_length"] + (x["score"] * 10), reverse=True)
|
||||
top_dd = candidate_posts[:top_n]
|
||||
|
||||
if as_list:
|
||||
if not llm_evaluator:
|
||||
import re
|
||||
|
||||
ticker_pattern = r"\$([A-Z]{2,5})\b|^([A-Z]{2,5})\s"
|
||||
for post in top_dd:
|
||||
matches = re.findall(ticker_pattern, post["title"] + " " + post["text"])
|
||||
tickers = list(set([t[0] or t[1] for t in matches if t[0] or t[1]]))
|
||||
post["ticker"] = tickers[0] if tickers else ""
|
||||
post["quality_score"] = 75 # default to Medium priority
|
||||
return top_dd
|
||||
|
||||
if not top_dd:
|
||||
return f"# Undiscovered DD\n\nNo high-quality DD found (scanned {len(candidate_posts)} posts)."
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue