fix(filter): replace tqdm with logger in batch news functions to fix I/O error
tqdm writes to stderr immediately on __enter__, before any loop iteration.
In Streamlit's thread/subprocess context stderr can be a closed pipe, causing
'I/O operation on closed file' which _run_call catches and returns {} — so
the entire news enrichment step was silently skipped every run.
Replaced tqdm progress bars with logger.info() calls in:
- get_batch_stock_news_google() in openai.py
- get_batch_stock_news_openai() in openai.py
- Reddit DD parallel evaluation in reddit_api.py
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
21b33c6709
commit
61b731ac28
|
|
@ -113,18 +113,18 @@ def get_batch_stock_news_openai(
|
||||||
class PortfolioUpdate(BaseModel):
|
class PortfolioUpdate(BaseModel):
|
||||||
items: List[TickerNews]
|
items: List[TickerNews]
|
||||||
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
client = _get_openai_client()
|
client = _get_openai_client()
|
||||||
results = {}
|
results = {}
|
||||||
|
total_batches = (len(tickers) + batch_size - 1) // batch_size
|
||||||
|
|
||||||
# Process in batches to avoid output token limits
|
# Process in batches to avoid output token limits
|
||||||
with tqdm(total=len(tickers), desc="📰 OpenAI batch news", unit="ticker") as pbar:
|
for i in range(0, len(tickers), batch_size):
|
||||||
for i in range(0, len(tickers), batch_size):
|
batch = tickers[i : i + batch_size]
|
||||||
batch = tickers[i : i + batch_size]
|
batch_num = i // batch_size + 1
|
||||||
|
logger.info(f"📰 OpenAI news batch {batch_num}/{total_batches}: {batch}")
|
||||||
|
|
||||||
# Request comprehensive news summaries for better ranker LLM context
|
# Request comprehensive news summaries for better ranker LLM context
|
||||||
prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.
|
prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.
|
||||||
|
|
||||||
Focus on business catalysts: earnings, product launches, partnerships, analyst changes, regulatory news.
|
Focus on business catalysts: earnings, product launches, partnerships, analyst changes, regulatory news.
|
||||||
|
|
||||||
|
|
@ -135,32 +135,29 @@ For each ticker, provide a comprehensive summary (5-8 sentences) covering:
|
||||||
- Market reaction or implications
|
- Market reaction or implications
|
||||||
- Any forward-looking statements or guidance"""
|
- Any forward-looking statements or guidance"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
completion = client.responses.parse(
|
completion = client.responses.parse(
|
||||||
model="gpt-5-nano",
|
model="gpt-5-nano",
|
||||||
tools=[{"type": "web_search"}],
|
tools=[{"type": "web_search"}],
|
||||||
input=prompt,
|
input=prompt,
|
||||||
text_format=PortfolioUpdate,
|
text_format=PortfolioUpdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract structured output
|
# Extract structured output
|
||||||
if completion.output_parsed:
|
if completion.output_parsed:
|
||||||
for item in completion.output_parsed.items:
|
for item in completion.output_parsed.items:
|
||||||
results[item.ticker.upper()] = item.news_summary
|
results[item.ticker.upper()] = item.news_summary
|
||||||
else:
|
else:
|
||||||
# Fallback if parsing failed
|
# Fallback if parsing failed
|
||||||
logger.warning(f"Structured parsing returned None for batch: {batch}")
|
logger.warning(f"Structured parsing returned None for batch: {batch}")
|
||||||
for ticker in batch:
|
|
||||||
results[ticker.upper()] = ""
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error fetching batch news for {batch}: {e}")
|
|
||||||
# On error, set empty string for all tickers in batch
|
|
||||||
for ticker in batch:
|
for ticker in batch:
|
||||||
results[ticker.upper()] = ""
|
results[ticker.upper()] = ""
|
||||||
|
|
||||||
# Update progress bar
|
except Exception as e:
|
||||||
pbar.update(len(batch))
|
logger.error(f"Error fetching batch news for {batch}: {e}")
|
||||||
|
# On error, set empty string for all tickers in batch
|
||||||
|
for ticker in batch:
|
||||||
|
results[ticker.upper()] = ""
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
@ -218,15 +215,16 @@ def get_batch_stock_news_google(
|
||||||
).with_structured_output(PortfolioUpdate, method="json_schema")
|
).with_structured_output(PortfolioUpdate, method="json_schema")
|
||||||
results = {}
|
results = {}
|
||||||
|
|
||||||
from tqdm import tqdm
|
total_batches = (len(tickers) + batch_size - 1) // batch_size
|
||||||
|
|
||||||
# Process in batches
|
# Process in batches
|
||||||
with tqdm(total=len(tickers), desc="📰 Google batch news", unit="ticker") as pbar:
|
for i in range(0, len(tickers), batch_size):
|
||||||
for i in range(0, len(tickers), batch_size):
|
batch = tickers[i : i + batch_size]
|
||||||
batch = tickers[i : i + batch_size]
|
batch_num = i // batch_size + 1
|
||||||
|
logger.info(f"📰 Google news batch {batch_num}/{total_batches}: {batch}")
|
||||||
|
|
||||||
# Request comprehensive news summaries for better ranker LLM context
|
# Request comprehensive news summaries for better ranker LLM context
|
||||||
prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.
|
prompt = f"""Find the most significant news stories for {batch} from {start_date} to {end_date}.
|
||||||
|
|
||||||
Focus on business catalysts: earnings, product launches, partnerships, analyst changes, regulatory news.
|
Focus on business catalysts: earnings, product launches, partnerships, analyst changes, regulatory news.
|
||||||
|
|
||||||
|
|
@ -237,33 +235,30 @@ For each ticker, provide a comprehensive summary (5-8 sentences) covering:
|
||||||
- Market reaction or implications
|
- Market reaction or implications
|
||||||
- Any forward-looking statements or guidance"""
|
- Any forward-looking statements or guidance"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Step 1: Perform Google search (grounded response)
|
# Step 1: Perform Google search (grounded response)
|
||||||
raw_news = search_llm.invoke(prompt)
|
raw_news = search_llm.invoke(prompt)
|
||||||
|
|
||||||
# Step 2: Structure the grounded results
|
# Step 2: Structure the grounded results
|
||||||
structured_result = structured_llm.invoke(
|
structured_result = structured_llm.invoke(
|
||||||
f"Using this verified news data: {raw_news.content}\n\n"
|
f"Using this verified news data: {raw_news.content}\n\n"
|
||||||
f"Format the news for these tickers into the JSON structure: {batch}\n"
|
f"Format the news for these tickers into the JSON structure: {batch}\n"
|
||||||
f"Include all tickers from the list, even if no news was found."
|
f"Include all tickers from the list, even if no news was found."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract results
|
# Extract results
|
||||||
if structured_result and hasattr(structured_result, "items"):
|
if structured_result and hasattr(structured_result, "items"):
|
||||||
for item in structured_result.items:
|
for item in structured_result.items:
|
||||||
results[item.ticker.upper()] = item.news_summary
|
results[item.ticker.upper()] = item.news_summary
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Structured output invalid for batch: {batch}")
|
logger.warning(f"Structured output invalid for batch: {batch}")
|
||||||
for ticker in batch:
|
|
||||||
results[ticker.upper()] = ""
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error fetching Google batch news for {batch}: {e}")
|
|
||||||
# On error, set empty string for all tickers in batch
|
|
||||||
for ticker in batch:
|
for ticker in batch:
|
||||||
results[ticker.upper()] = ""
|
results[ticker.upper()] = ""
|
||||||
|
|
||||||
# Update progress bar
|
except Exception as e:
|
||||||
pbar.update(len(batch))
|
logger.error(f"Error fetching Google batch news for {batch}: {e}")
|
||||||
|
# On error, set empty string for all tickers in batch
|
||||||
|
for ticker in batch:
|
||||||
|
results[ticker.upper()] = ""
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
||||||
|
|
@ -342,7 +342,8 @@ def get_reddit_undiscovered_dd(
|
||||||
top_n: Annotated[int, "Number of top DD posts to return"] = 10,
|
top_n: Annotated[int, "Number of top DD posts to return"] = 10,
|
||||||
num_comments: Annotated[int, "Number of top comments to include"] = 10,
|
num_comments: Annotated[int, "Number of top comments to include"] = 10,
|
||||||
llm_evaluator=None, # Will be passed from discovery graph
|
llm_evaluator=None, # Will be passed from discovery graph
|
||||||
) -> str:
|
as_list: bool = False,
|
||||||
|
) -> str | list:
|
||||||
"""
|
"""
|
||||||
Find high-quality undiscovered DD using LLM evaluation.
|
Find high-quality undiscovered DD using LLM evaluation.
|
||||||
|
|
||||||
|
|
@ -383,18 +384,19 @@ def get_reddit_undiscovered_dd(
|
||||||
if not submission.selftext or len(submission.selftext) < 200:
|
if not submission.selftext or len(submission.selftext) < 200:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Get top comments for community validation
|
|
||||||
submission.comment_sort = "top"
|
|
||||||
submission.comments.replace_more(limit=0)
|
|
||||||
top_comments = []
|
top_comments = []
|
||||||
for comment in submission.comments[:num_comments]:
|
if llm_evaluator:
|
||||||
if hasattr(comment, "body") and hasattr(comment, "score"):
|
# Get top comments for community validation
|
||||||
top_comments.append(
|
submission.comment_sort = "top"
|
||||||
{
|
submission.comments.replace_more(limit=0)
|
||||||
"body": comment.body[:1000], # Include more of each comment
|
for comment in submission.comments[:num_comments]:
|
||||||
"score": comment.score,
|
if hasattr(comment, "body") and hasattr(comment, "score"):
|
||||||
}
|
top_comments.append(
|
||||||
)
|
{
|
||||||
|
"body": comment.body[:1000], # Include more of each comment
|
||||||
|
"score": comment.score,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
candidate_posts.append(
|
candidate_posts.append(
|
||||||
{
|
{
|
||||||
|
|
@ -517,27 +519,11 @@ Extract all stock ticker symbols mentioned in the post or comments."""
|
||||||
|
|
||||||
return post
|
return post
|
||||||
|
|
||||||
# Parallel evaluation with progress tracking
|
# Parallel evaluation
|
||||||
try:
|
logger.info(f"Scanning {len(candidate_posts)} Reddit posts with LLM...")
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
use_tqdm = True
|
|
||||||
except ImportError:
|
|
||||||
use_tqdm = False
|
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
futures = [executor.submit(evaluate_post, post) for post in candidate_posts]
|
futures = [executor.submit(evaluate_post, post) for post in candidate_posts]
|
||||||
|
evaluated = [f.result() for f in as_completed(futures)]
|
||||||
if use_tqdm:
|
|
||||||
# With progress bar
|
|
||||||
evaluated = []
|
|
||||||
for future in tqdm(
|
|
||||||
as_completed(futures), total=len(futures), desc=" Evaluating posts"
|
|
||||||
):
|
|
||||||
evaluated.append(future.result())
|
|
||||||
else:
|
|
||||||
# Without progress bar (fallback)
|
|
||||||
evaluated = [f.result() for f in as_completed(futures)]
|
|
||||||
|
|
||||||
# Filter quality threshold (55+ = decent DD)
|
# Filter quality threshold (55+ = decent DD)
|
||||||
quality_dd = [p for p in evaluated if p["quality_score"] >= 55]
|
quality_dd = [p for p in evaluated if p["quality_score"] >= 55]
|
||||||
|
|
@ -559,6 +545,18 @@ Extract all stock ticker symbols mentioned in the post or comments."""
|
||||||
candidate_posts.sort(key=lambda x: x["full_length"] + (x["score"] * 10), reverse=True)
|
candidate_posts.sort(key=lambda x: x["full_length"] + (x["score"] * 10), reverse=True)
|
||||||
top_dd = candidate_posts[:top_n]
|
top_dd = candidate_posts[:top_n]
|
||||||
|
|
||||||
|
if as_list:
|
||||||
|
if not llm_evaluator:
|
||||||
|
import re
|
||||||
|
|
||||||
|
ticker_pattern = r"\$([A-Z]{2,5})\b|^([A-Z]{2,5})\s"
|
||||||
|
for post in top_dd:
|
||||||
|
matches = re.findall(ticker_pattern, post["title"] + " " + post["text"])
|
||||||
|
tickers = list(set([t[0] or t[1] for t in matches if t[0] or t[1]]))
|
||||||
|
post["ticker"] = tickers[0] if tickers else ""
|
||||||
|
post["quality_score"] = 75 # default to Medium priority
|
||||||
|
return top_dd
|
||||||
|
|
||||||
if not top_dd:
|
if not top_dd:
|
||||||
return f"# Undiscovered DD\n\nNo high-quality DD found (scanned {len(candidate_posts)} posts)."
|
return f"# Undiscovered DD\n\nNo high-quality DD found (scanned {len(candidate_posts)} posts)."
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue