TradingAgents/tradingagents/dataflows/yfinance_scanner.py

426 lines
16 KiB
Python

"""yfinance-based scanner data fetching functions for market-wide analysis."""
import yfinance as yf
import requests
from datetime import datetime
from typing import Annotated
from .finnhub_common import ThirdPartyTimeoutError
def get_market_movers_yfinance(
category: Annotated[str, "Category: 'day_gainers', 'day_losers', or 'most_actives'"]
) -> str:
"""
Get market movers using yfinance Screener.
Args:
category: One of 'day_gainers', 'day_losers', or 'most_actives'
Returns:
Formatted string containing top market movers
"""
try:
# Map category to yfinance screener predefined screener
screener_keys = {
"day_gainers": "DAY_GAINERS",
"day_losers": "DAY_LOSERS",
"most_actives": "MOST_ACTIVES"
}
if category not in screener_keys:
return f"Invalid category '{category}'. Must be one of: {list(screener_keys.keys())}"
# Use yfinance screener module's screen function
data = yf.screener.screen(screener_keys[category], count=25)
if not data or not isinstance(data, dict) or 'quotes' not in data:
return f"No data found for {category}"
quotes = data['quotes']
if not quotes:
return f"No quotes found for {category}"
# Format the output
header = f"# Market Movers: {category.replace('_', ' ').title()}\n"
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
# Optimized: Used list collection and string join to avoid memory reallocation overhead
lines = [
header,
"| Symbol | Name | Price | Change % | Volume | Market Cap |",
"|--------|------|-------|----------|--------|------------|"
]
for quote in quotes[:15]: # Top 15
symbol = quote.get('symbol', 'N/A')
name = quote.get('shortName', quote.get('longName', 'N/A'))
price = quote.get('regularMarketPrice', 'N/A')
change_pct = quote.get('regularMarketChangePercent', 'N/A')
volume = quote.get('regularMarketVolume', 'N/A')
market_cap = quote.get('marketCap', 'N/A')
# Format numbers
if isinstance(price, (int, float)):
price = f"${price:.2f}"
if isinstance(change_pct, (int, float)):
change_pct = f"{change_pct:.2f}%"
if isinstance(volume, (int, float)):
volume = f"{volume:,.0f}"
if isinstance(market_cap, (int, float)):
market_cap = f"${market_cap:,.0f}"
lines.append(f"| {symbol} | {name[:30]} | {price} | {change_pct} | {volume} | {market_cap} |")
return "\n".join(lines) + "\n"
except requests.exceptions.Timeout:
raise ThirdPartyTimeoutError(f"Request timed out fetching market movers")
except ThirdPartyTimeoutError:
raise
except Exception as e:
return f"Error fetching market movers for {category}: {str(e)}"
def get_market_indices_yfinance() -> str:
"""
Get major market indices data.
Returns:
Formatted string containing index values and daily changes
"""
try:
# Major market indices
indices = {
"^GSPC": "S&P 500",
"^DJI": "Dow Jones",
"^IXIC": "NASDAQ",
"^VIX": "VIX (Volatility Index)",
"^RUT": "Russell 2000"
}
header = "# Major Market Indices\n"
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
# Optimized: Used list collection and string join to avoid memory reallocation overhead
lines = [
header,
"| Index | Current Price | Change | Change % | 52W High | 52W Low |",
"|-------|---------------|--------|----------|----------|----------|"
]
# Batch-download 1-day history for all symbols in a single request
symbols = list(indices.keys())
indices_history = yf.download(symbols, period="2d", auto_adjust=True, progress=False, threads=True)
for symbol, name in indices.items():
try:
ticker = yf.Ticker(symbol)
# fast_info is a lightweight cached property (no extra HTTP call)
fast = ticker.fast_info
# Extract history for this symbol from the batch download
try:
if len(symbols) > 1:
closes = indices_history["Close"][symbol].dropna()
else:
closes = indices_history["Close"].dropna()
except KeyError:
closes = None
if closes is None or len(closes) == 0:
lines.append(f"| {name} | N/A | - | - | - | - |")
continue
current_price = closes.iloc[-1]
prev_close = closes.iloc[-2] if len(closes) >= 2 else fast.previous_close
if prev_close is None or prev_close == 0:
prev_close = current_price
change = current_price - prev_close
change_pct = (change / prev_close * 100) if prev_close else 0
high_52w = fast.year_high
low_52w = fast.year_low
# Format numbers
current_str = f"{current_price:.2f}"
change_str = f"{change:+.2f}"
change_pct_str = f"{change_pct:+.2f}%"
high_str = f"{high_52w:.2f}" if isinstance(high_52w, (int, float)) else str(high_52w)
low_str = f"{low_52w:.2f}" if isinstance(low_52w, (int, float)) else str(low_52w)
lines.append(f"| {name} | {current_str} | {change_str} | {change_pct_str} | {high_str} | {low_str} |")
except Exception as e:
lines.append(f"| {name} | Error: {str(e)} | - | - | - | - |")
return "\n".join(lines) + "\n"
except requests.exceptions.Timeout:
raise ThirdPartyTimeoutError(f"Request timed out fetching market indices")
except ThirdPartyTimeoutError:
raise
except Exception as e:
return f"Error fetching market indices: {str(e)}"
def get_sector_performance_yfinance() -> str:
"""
Get sector-level performance overview using SPDR sector ETFs.
yfinance Sector.overview lacks performance data, so we use
sector ETFs (XLK, XLV, etc.) with yf.download() to compute
1-day, 1-week, 1-month, and YTD returns.
Returns:
Formatted string containing sector performance data
"""
# Map GICS sectors to SPDR ETF tickers
sector_etfs = {
"Technology": "XLK",
"Healthcare": "XLV",
"Financials": "XLF",
"Energy": "XLE",
"Consumer Discretionary": "XLY",
"Consumer Staples": "XLP",
"Industrials": "XLI",
"Materials": "XLB",
"Real Estate": "XLRE",
"Utilities": "XLU",
"Communication Services": "XLC",
}
try:
symbols = list(sector_etfs.values())
# Download ~6 months of data to cover YTD, 1-month, 1-week
hist = yf.download(symbols, period="6mo", auto_adjust=True, progress=False, threads=True)
header = "# Sector Performance Overview\n"
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
# Optimized: Used list collection and string join to avoid memory reallocation overhead
lines = [
header,
"| Sector | 1-Day % | 1-Week % | 1-Month % | YTD % |",
"|--------|---------|----------|-----------|-------|"
]
for sector_name, etf in sector_etfs.items():
try:
# Extract close prices for this ETF
if len(symbols) > 1:
closes = hist["Close"][etf].dropna()
else:
closes = hist["Close"].dropna()
if closes.empty or len(closes) < 2:
lines.append(f"| {sector_name} | N/A | N/A | N/A | N/A |")
continue
current = closes.iloc[-1]
prev = closes.iloc[-2]
# 1-day
day_pct = (current - prev) / prev * 100 if prev else 0
# 1-week (~5 trading days)
week_pct = _safe_pct(closes, 5)
# 1-month (~21 trading days)
month_pct = _safe_pct(closes, 21)
# YTD: first close of current year vs now
current_year = closes.index[-1].year
year_closes = closes[closes.index.year == current_year]
if len(year_closes) > 0 and year_closes.iloc[0] != 0:
ytd_pct = (current - year_closes.iloc[0]) / year_closes.iloc[0] * 100
else:
ytd_pct = None
day_str = f"{day_pct:+.2f}%"
week_str = f"{week_pct:+.2f}%" if week_pct is not None else "N/A"
month_str = f"{month_pct:+.2f}%" if month_pct is not None else "N/A"
ytd_str = f"{ytd_pct:+.2f}%" if ytd_pct is not None else "N/A"
lines.append(f"| {sector_name} | {day_str} | {week_str} | {month_str} | {ytd_str} |")
except Exception as e:
lines.append(f"| {sector_name} | Error: {str(e)[:30]} | - | - | - |")
return "\n".join(lines) + "\n"
except requests.exceptions.Timeout:
raise ThirdPartyTimeoutError(f"Request timed out fetching sector performance")
except ThirdPartyTimeoutError:
raise
except Exception as e:
return f"Error fetching sector performance: {str(e)}"
def _safe_pct(closes, days_back: int) -> float | None:
"""Compute percentage change from days_back trading days ago."""
if len(closes) < days_back + 1:
return None
base = closes.iloc[-(days_back + 1)]
current = closes.iloc[-1]
if base == 0:
return None
return (current - base) / base * 100
def get_industry_performance_yfinance(
sector_key: Annotated[str, "Sector key (e.g., 'technology', 'healthcare')"]
) -> str:
"""
Get industry-level drill-down within a sector.
Returns top companies with metadata (rating, market weight) **plus**
recent price performance (1-day, 1-week, 1-month returns) obtained
via a single batched ``yf.download()`` call for the top 10 tickers.
Args:
sector_key: Sector identifier (e.g., 'technology', 'healthcare')
Returns:
Formatted string containing industry performance data within the sector
"""
try:
# Normalize sector key to yfinance format
sector_key = sector_key.lower().replace(" ", "-")
sector = yf.Sector(sector_key)
top_companies = sector.top_companies
if top_companies is None or top_companies.empty:
return f"No industry data found for sector '{sector_key}'"
# --- Batch-download price history for the top 10 tickers ----------
tickers = list(top_companies.head(10).index)
price_returns: dict[str, dict[str, float | None]] = {}
try:
hist = yf.download(
tickers, period="1mo", auto_adjust=True, progress=False, threads=True,
)
for tkr in tickers:
try:
if len(tickers) > 1:
closes = hist["Close"][tkr].dropna()
else:
closes = hist["Close"].dropna()
if closes.empty or len(closes) < 2:
continue
price_returns[tkr] = {
"1d": _safe_pct(closes, 1),
"1w": _safe_pct(closes, 5),
"1m": _safe_pct(closes, len(closes) - 1),
}
except Exception:
continue
except Exception:
pass # Fall through — table will show N/A for returns
# ------------------------------------------------------------------
header = f"# Industry Performance: {sector_key.replace('-', ' ').title()}\n"
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
# Optimized: Used list collection and string join to avoid memory reallocation overhead
lines = [
header,
"| Company | Symbol | Rating | Market Weight | 1-Day % | 1-Week % | 1-Month % |",
"|---------|--------|--------|---------------|---------|----------|-----------|"
]
# top_companies has ticker as the DataFrame index (index.name == 'symbol')
# Columns: name, rating, market weight
# Display only the tickers we downloaded prices for to avoid N/A gaps
for symbol, row in top_companies.head(10).iterrows():
name = row.get('name', 'N/A')
rating = row.get('rating', 'N/A')
market_weight = row.get('market weight', None)
name_short = name[:30] if isinstance(name, str) else str(name)
weight_str = f"{market_weight:.2%}" if isinstance(market_weight, (int, float)) else "N/A"
ret = price_returns.get(symbol, {})
day_str = f"{ret['1d']:+.2f}%" if ret.get('1d') is not None else "N/A"
week_str = f"{ret['1w']:+.2f}%" if ret.get('1w') is not None else "N/A"
month_str = f"{ret['1m']:+.2f}%" if ret.get('1m') is not None else "N/A"
lines.append(
f"| {name_short} | {symbol} | {rating} | {weight_str}"
f" | {day_str} | {week_str} | {month_str} |"
)
return "\n".join(lines) + "\n"
except requests.exceptions.Timeout:
raise ThirdPartyTimeoutError(f"Request timed out fetching industry performance")
except ThirdPartyTimeoutError:
raise
except Exception as e:
return f"Error fetching industry performance for sector '{sector_key}': {str(e)}"
def get_topic_news_yfinance(
topic: Annotated[str, "Search topic/query (e.g., 'artificial intelligence', 'semiconductor')"],
limit: Annotated[int, "Maximum number of articles to return"] = 10
) -> str:
"""
Search news by arbitrary topic using yfinance Search.
Args:
topic: Search query/topic
limit: Maximum number of articles to return
Returns:
Formatted string containing news articles for the topic
"""
try:
search = yf.Search(
query=topic,
news_count=limit,
enable_fuzzy_query=True,
)
if not search.news:
return f"No news found for topic '{topic}'"
header = f"# News for Topic: {topic}\n"
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
# Optimized: Used list collection and string join to avoid memory reallocation overhead
lines = [header.strip(), ""]
for article in search.news[:limit]:
# Handle nested content structure
if "content" in article:
content = article["content"]
title = content.get("title", "No title")
summary = content.get("summary", "")
provider = content.get("provider", {})
publisher = provider.get("displayName", "Unknown")
# Get URL
url_obj = content.get("canonicalUrl") or content.get("clickThroughUrl") or {}
link = url_obj.get("url", "")
else:
title = article.get("title", "No title")
summary = article.get("summary", "")
publisher = article.get("publisher", "Unknown")
link = article.get("link", "")
lines.append(f"### {title} (source: {publisher})")
if summary:
lines.append(f"{summary}")
if link:
lines.append(f"Link: {link}")
lines.append("")
return "\n".join(lines) + "\n"
except requests.exceptions.Timeout:
raise ThirdPartyTimeoutError(f"Request timed out fetching news for topic '{topic}'")
except ThirdPartyTimeoutError:
raise
except Exception as e:
return f"Error fetching news for topic '{topic}': {str(e)}"