483 lines
19 KiB
Python
483 lines
19 KiB
Python
"""
|
|
Finviz + Yahoo Finance Hybrid - Short Interest Discovery
|
|
Uses Finviz to discover tickers with high short interest, then Yahoo Finance for exact data
|
|
"""
|
|
|
|
import re
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from typing import Annotated
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
from tradingagents.dataflows.y_finance import get_ticker_info
|
|
from tradingagents.utils.logger import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
def get_short_interest(
|
|
min_short_interest_pct: Annotated[float, "Minimum short interest % of float"] = 10.0,
|
|
min_days_to_cover: Annotated[float, "Minimum days to cover ratio"] = 2.0,
|
|
top_n: Annotated[int, "Number of top results to return"] = 20,
|
|
return_structured: Annotated[bool, "Return dict with raw data instead of markdown"] = False,
|
|
):
|
|
"""
|
|
Discover stocks with high short interest using Finviz + Yahoo Finance.
|
|
|
|
Strategy: Finviz filters stocks by short interest (discovery),
|
|
then Yahoo Finance provides exact short % data.
|
|
|
|
This is a TRUE DISCOVERY tool - finds stocks we may not know about,
|
|
not checking a predefined watchlist.
|
|
|
|
Args:
|
|
min_short_interest_pct: Minimum short interest as % of float
|
|
min_days_to_cover: Minimum days to cover ratio
|
|
top_n: Number of top results to return
|
|
return_structured: If True, returns list of dicts instead of markdown
|
|
|
|
Returns:
|
|
If return_structured=True: list of candidate dicts with ticker, short_interest_pct, signal, etc.
|
|
If return_structured=False: Formatted markdown report
|
|
"""
|
|
try:
|
|
# Step 1: Use Finviz screener to DISCOVER tickers with high short interest
|
|
logger.info(
|
|
f"Discovering tickers with short interest >{min_short_interest_pct}% from Finviz..."
|
|
)
|
|
|
|
# Determine Finviz filter
|
|
if min_short_interest_pct >= 20:
|
|
short_filter = "sh_short_o20"
|
|
elif min_short_interest_pct >= 15:
|
|
short_filter = "sh_short_o15"
|
|
elif min_short_interest_pct >= 10:
|
|
short_filter = "sh_short_o10"
|
|
else:
|
|
short_filter = "sh_short_o5"
|
|
|
|
# Build Finviz URL (v=152 is simple view)
|
|
base_url = f"https://finviz.com/screener.ashx?v=152&f={short_filter}"
|
|
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
"Accept": "text/html",
|
|
}
|
|
|
|
discovered_tickers = []
|
|
|
|
# Scrape first 3 pages (60 stocks)
|
|
for page_num in range(1, 4):
|
|
if page_num == 1:
|
|
url = base_url
|
|
else:
|
|
offset = (page_num - 1) * 20 + 1
|
|
url = f"{base_url}&r={offset}"
|
|
|
|
response = requests.get(url, headers=headers, timeout=30)
|
|
response.raise_for_status()
|
|
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
# Find ticker links in the page
|
|
ticker_links = soup.find_all("a", href=re.compile(r"quote\.ashx\?t="))
|
|
|
|
for link in ticker_links:
|
|
ticker = link.get_text(strip=True)
|
|
# Validate it's a ticker (1-5 uppercase letters)
|
|
if re.match(r"^[A-Z]{1,5}$", ticker) and ticker not in discovered_tickers:
|
|
discovered_tickers.append(ticker)
|
|
|
|
if not discovered_tickers:
|
|
if return_structured:
|
|
return []
|
|
return f"No stocks discovered with short interest >{min_short_interest_pct}% on Finviz."
|
|
|
|
logger.info(f"Discovered {len(discovered_tickers)} tickers from Finviz")
|
|
logger.info("Fetching detailed short interest data from Yahoo Finance...")
|
|
|
|
# Step 2: Use Yahoo Finance to get EXACT short interest data for discovered tickers
|
|
def fetch_short_data(ticker):
|
|
try:
|
|
info = get_ticker_info(ticker)
|
|
|
|
# Get short interest data
|
|
short_pct = info.get("shortPercentOfFloat", info.get("sharesPercentSharesOut", 0))
|
|
if short_pct and isinstance(short_pct, (int, float)):
|
|
short_pct = short_pct * 100 # Convert to percentage
|
|
else:
|
|
return None
|
|
|
|
# Verify it meets criteria (Finviz filter might be outdated)
|
|
if short_pct >= min_short_interest_pct:
|
|
price = info.get("currentPrice", info.get("regularMarketPrice", 0))
|
|
market_cap = info.get("marketCap", 0)
|
|
volume = info.get("volume", info.get("regularMarketVolume", 0))
|
|
|
|
# Days to cover (short ratio): shares short / avg daily volume
|
|
days_to_cover = info.get("shortRatio")
|
|
if days_to_cover is None or not isinstance(days_to_cover, (int, float)):
|
|
days_to_cover = 0.0
|
|
|
|
# Apply days-to-cover filter
|
|
if days_to_cover < min_days_to_cover:
|
|
return None
|
|
|
|
# Categorize squeeze potential
|
|
if short_pct >= 30:
|
|
signal = "extreme_squeeze_risk"
|
|
elif short_pct >= 20:
|
|
signal = "high_squeeze_potential"
|
|
elif short_pct >= 15:
|
|
signal = "moderate_squeeze_potential"
|
|
else:
|
|
signal = "low_squeeze_potential"
|
|
|
|
return {
|
|
"ticker": ticker,
|
|
"price": price,
|
|
"market_cap": market_cap,
|
|
"volume": volume,
|
|
"short_interest_pct": short_pct,
|
|
"days_to_cover": days_to_cover,
|
|
"signal": signal,
|
|
}
|
|
except Exception:
|
|
return None
|
|
|
|
# Fetch data in parallel (faster)
|
|
all_candidates = []
|
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
futures = {
|
|
executor.submit(fetch_short_data, ticker): ticker for ticker in discovered_tickers
|
|
}
|
|
|
|
for future in as_completed(futures):
|
|
result = future.result()
|
|
if result:
|
|
all_candidates.append(result)
|
|
|
|
if not all_candidates:
|
|
if return_structured:
|
|
return []
|
|
return f"No stocks with verified short interest >{min_short_interest_pct}% (Finviz found {len(discovered_tickers)} tickers but Yahoo Finance data didn't confirm)."
|
|
|
|
# Sort by short interest percentage (highest first)
|
|
sorted_candidates = sorted(
|
|
all_candidates, key=lambda x: x["short_interest_pct"], reverse=True
|
|
)[:top_n]
|
|
|
|
# Return structured data if requested
|
|
if return_structured:
|
|
return sorted_candidates
|
|
|
|
# Format output
|
|
report = "# Discovered High Short Interest Stocks\n\n"
|
|
report += f"**Criteria**: Short Interest >{min_short_interest_pct}%\n"
|
|
report += "**Data Source**: Finviz Screener (Web Scraping)\n"
|
|
report += f"**Total Discovered**: {len(all_candidates)} stocks\n\n"
|
|
report += f"**Top {len(sorted_candidates)} Candidates**:\n\n"
|
|
report += "| Ticker | Price | Market Cap | Volume | Short % | Signal |\n"
|
|
report += "|--------|-------|------------|--------|---------|--------|\n"
|
|
|
|
for candidate in sorted_candidates:
|
|
market_cap_str = format_market_cap(candidate["market_cap"])
|
|
report += f"| {candidate['ticker']} | "
|
|
report += f"${candidate['price']:.2f} | "
|
|
report += f"{market_cap_str} | "
|
|
report += f"{candidate['volume']:,} | "
|
|
report += f"{candidate['short_interest_pct']:.1f}% | "
|
|
report += f"{candidate['signal']} |\n"
|
|
|
|
report += "\n\n## Signal Definitions\n\n"
|
|
report += "- **extreme_squeeze_risk**: Short interest >30% - Very high squeeze potential\n"
|
|
report += "- **high_squeeze_potential**: Short interest 20-30% - High squeeze risk\n"
|
|
report += (
|
|
"- **moderate_squeeze_potential**: Short interest 15-20% - Moderate squeeze risk\n"
|
|
)
|
|
report += "- **low_squeeze_potential**: Short interest 10-15% - Lower squeeze risk\n\n"
|
|
report += "**Note**: High short interest alone doesn't guarantee a squeeze. Look for positive catalysts.\n"
|
|
|
|
return report
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
if return_structured:
|
|
return []
|
|
return f"Error scraping Finviz: {str(e)}"
|
|
except Exception as e:
|
|
if return_structured:
|
|
return []
|
|
return f"Unexpected error discovering short interest stocks: {str(e)}"
|
|
|
|
|
|
def parse_market_cap(market_cap_text: str) -> float:
|
|
"""Parse market cap from Finviz format (e.g., '1.23B', '456M')."""
|
|
if not market_cap_text or market_cap_text == "-":
|
|
return 0.0
|
|
|
|
market_cap_text = market_cap_text.upper().strip()
|
|
|
|
# Extract number and multiplier
|
|
match = re.match(r"([0-9.]+)([BMK])?", market_cap_text)
|
|
if not match:
|
|
return 0.0
|
|
|
|
number = float(match.group(1))
|
|
multiplier = match.group(2)
|
|
|
|
if multiplier == "B":
|
|
return number * 1_000_000_000
|
|
elif multiplier == "M":
|
|
return number * 1_000_000
|
|
elif multiplier == "K":
|
|
return number * 1_000
|
|
else:
|
|
return number
|
|
|
|
|
|
def format_market_cap(market_cap: float) -> str:
|
|
"""Format market cap for display."""
|
|
if market_cap >= 1_000_000_000:
|
|
return f"${market_cap / 1_000_000_000:.2f}B"
|
|
elif market_cap >= 1_000_000:
|
|
return f"${market_cap / 1_000_000:.2f}M"
|
|
else:
|
|
return f"${market_cap:,.0f}"
|
|
|
|
|
|
def get_finviz_short_interest(
|
|
min_short_interest_pct: float = 10.0,
|
|
min_days_to_cover: float = 2.0,
|
|
top_n: int = 20,
|
|
) -> str:
|
|
"""Alias for get_short_interest to match registry naming convention"""
|
|
return get_short_interest(min_short_interest_pct, min_days_to_cover, top_n)
|
|
|
|
|
|
def get_insider_buying_screener(
|
|
transaction_type: Annotated[str, "Transaction type: 'buy', 'sell', or 'any'"] = "buy",
|
|
lookback_days: Annotated[int, "Days to look back for transactions"] = 7,
|
|
min_value: Annotated[int, "Minimum transaction value in dollars"] = 25000,
|
|
top_n: Annotated[int, "Number of top results to return"] = 20,
|
|
return_structured: Annotated[bool, "Return list of dicts instead of markdown"] = False,
|
|
deduplicate: Annotated[bool, "If False, return all transactions without deduplication"] = True,
|
|
):
|
|
"""
|
|
Discover stocks with recent insider buying/selling using OpenInsider.
|
|
|
|
LEADING INDICATOR: Insiders buying their own stock before price moves.
|
|
Results are sorted by transaction value (largest first).
|
|
|
|
Args:
|
|
transaction_type: "buy" for purchases, "sell" for sales
|
|
lookback_days: Days to look back (default 7)
|
|
min_value: Minimum transaction value in dollars
|
|
top_n: Number of top results to return
|
|
return_structured: If True, returns list of dicts instead of markdown
|
|
|
|
Returns:
|
|
If return_structured=True: list of transaction dicts
|
|
If return_structured=False: Formatted markdown report
|
|
"""
|
|
try:
|
|
filter_desc = "insider buying" if transaction_type == "buy" else "insider selling"
|
|
logger.info(f"Discovering tickers with {filter_desc} from OpenInsider...")
|
|
|
|
# OpenInsider screener URL
|
|
# xp=1 means exclude private transactions
|
|
# fd=7 means last 7 days filing date
|
|
# vl=25 means minimum value $25k
|
|
if transaction_type == "buy":
|
|
url = f"http://openinsider.com/screener?s=&o=&pl=&ph=&ll=&lh=&fd={lookback_days}&fdr=&td=0&tdr=&fdlyl=&fdlyh=&dtefrom=&dteto=&xp=1&vl={min_value // 1000}&vh=&ocl=&och=&session=all&cnt=100&page=1"
|
|
else:
|
|
url = f"http://openinsider.com/screener?s=&o=&pl=&ph=&ll=&lh=&fd={lookback_days}&fdr=&td=0&tdr=&fdlyl=&fdlyh=&dtefrom=&dteto=&xs=1&vl={min_value // 1000}&vh=&ocl=&och=&sic1=-1&sicl=100&sich=9999&grp=0&nfl=&nfh=&nil=&nih=&nol=&noh=&v2l=&v2h=&oc2l=&oc2h=&sortcol=4&cnt=100&page=1"
|
|
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
"Accept": "text/html",
|
|
}
|
|
|
|
response = requests.get(url, headers=headers, timeout=60)
|
|
response.raise_for_status()
|
|
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
# Find the main data table
|
|
table = soup.find("table", class_="tinytable")
|
|
if not table:
|
|
return f"No {filter_desc} data found on OpenInsider."
|
|
|
|
tbody = table.find("tbody")
|
|
if not tbody:
|
|
return f"No {filter_desc} data found on OpenInsider."
|
|
|
|
rows = tbody.find_all("tr")
|
|
|
|
transactions = []
|
|
|
|
for row in rows:
|
|
cells = row.find_all("td")
|
|
if len(cells) < 12:
|
|
continue
|
|
|
|
try:
|
|
# OpenInsider columns:
|
|
# 0: X (checkbox), 1: Filing Date, 2: Trade Date, 3: Ticker, 4: Company Name
|
|
# 5: Insider Name, 6: Title, 7: Trade Type, 8: Price, 9: Qty, 10: Owned, 11: ΔOwn, 12: Value
|
|
|
|
ticker_cell = cells[3]
|
|
ticker_link = ticker_cell.find("a")
|
|
ticker = ticker_link.get_text(strip=True) if ticker_link else ""
|
|
|
|
if not ticker or not re.match(r"^[A-Z]{1,5}$", ticker):
|
|
continue
|
|
|
|
company = cells[4].get_text(strip=True)[:40] if len(cells) > 4 else ""
|
|
insider_name = cells[5].get_text(strip=True)[:25] if len(cells) > 5 else ""
|
|
title_raw = cells[6].get_text(strip=True) if len(cells) > 6 else ""
|
|
# "10%" means 10% beneficial owner - clarify for readability
|
|
title = "10% Owner" if title_raw == "10%" else title_raw[:20]
|
|
trade_type = cells[7].get_text(strip=True) if len(cells) > 7 else ""
|
|
price = cells[8].get_text(strip=True) if len(cells) > 8 else ""
|
|
qty = cells[9].get_text(strip=True) if len(cells) > 9 else ""
|
|
value_str = cells[12].get_text(strip=True) if len(cells) > 12 else ""
|
|
|
|
# Filter by transaction type
|
|
trade_type_lower = trade_type.lower()
|
|
if (
|
|
transaction_type == "buy"
|
|
and "buy" not in trade_type_lower
|
|
and "p -" not in trade_type_lower
|
|
):
|
|
continue
|
|
if (
|
|
transaction_type == "sell"
|
|
and "sale" not in trade_type_lower
|
|
and "s -" not in trade_type_lower
|
|
):
|
|
continue
|
|
|
|
# Parse value for sorting
|
|
value_num = 0
|
|
if value_str:
|
|
# Remove $ and + signs, handle K/M suffixes
|
|
clean_value = (
|
|
value_str.replace("$", "").replace("+", "").replace(",", "").strip()
|
|
)
|
|
try:
|
|
if "M" in clean_value:
|
|
value_num = float(clean_value.replace("M", "")) * 1_000_000
|
|
elif "K" in clean_value:
|
|
value_num = float(clean_value.replace("K", "")) * 1_000
|
|
else:
|
|
value_num = float(clean_value)
|
|
except ValueError:
|
|
value_num = 0
|
|
|
|
transactions.append(
|
|
{
|
|
"ticker": ticker,
|
|
"company": company,
|
|
"insider": insider_name,
|
|
"title": title,
|
|
"trade_type": trade_type,
|
|
"price": price,
|
|
"qty": qty,
|
|
"value_str": value_str,
|
|
"value_num": value_num,
|
|
}
|
|
)
|
|
|
|
except Exception:
|
|
continue
|
|
|
|
if not transactions:
|
|
if return_structured:
|
|
return []
|
|
return f"No {filter_desc} transactions found in the last {lookback_days} days."
|
|
|
|
# Sort by value (largest first)
|
|
transactions.sort(key=lambda x: x["value_num"], reverse=True)
|
|
|
|
# Return all transactions without deduplication if requested
|
|
if return_structured and not deduplicate:
|
|
logger.info(f"Returning all {len(transactions)} {filter_desc} transactions (no dedup)")
|
|
return transactions
|
|
|
|
# Deduplicate by ticker, keeping the largest transaction per ticker
|
|
seen_tickers = set()
|
|
unique_transactions = []
|
|
for t in transactions:
|
|
if t["ticker"] not in seen_tickers:
|
|
seen_tickers.add(t["ticker"])
|
|
unique_transactions.append(t)
|
|
if len(unique_transactions) >= top_n:
|
|
break
|
|
|
|
logger.info(
|
|
f"Discovered {len(unique_transactions)} tickers with {filter_desc} (sorted by value)"
|
|
)
|
|
|
|
# Return structured data if requested
|
|
if return_structured:
|
|
return unique_transactions
|
|
|
|
# Format report
|
|
report_lines = [
|
|
f"# Insider {'Buying' if transaction_type == 'buy' else 'Selling'} Report",
|
|
f"*Top {len(unique_transactions)} stocks by transaction value (last {lookback_days} days)*\n",
|
|
"| Ticker | Company | Insider | Title | Value | Price |",
|
|
"|--------|---------|---------|-------|-------|-------|",
|
|
]
|
|
|
|
for t in unique_transactions:
|
|
report_lines.append(
|
|
f"| {t['ticker']} | {t['company']} | {t['insider']} | {t['title']} | {t['value_str']} | {t['price']} |"
|
|
)
|
|
|
|
report_lines.append(
|
|
f"\n**Total: {len(unique_transactions)} stocks with significant {filter_desc}**"
|
|
)
|
|
report_lines.append("*Sorted by transaction value (largest first)*")
|
|
|
|
return "\n".join(report_lines)
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
if return_structured:
|
|
return []
|
|
return f"Error fetching insider data from OpenInsider: {e}"
|
|
except Exception as e:
|
|
if return_structured:
|
|
return []
|
|
return f"Error processing insider screener: {e}"
|
|
|
|
|
|
def get_finviz_insider_buying(
|
|
transaction_type: str = "buy",
|
|
lookback_days: int = 7,
|
|
min_value: int = 25000,
|
|
top_n: int = 20,
|
|
return_structured: bool = False,
|
|
deduplicate: bool = True,
|
|
):
|
|
"""Alias for get_insider_buying_screener to match registry naming convention.
|
|
|
|
Args:
|
|
transaction_type: "buy" for purchases, "sell" for sales
|
|
lookback_days: Days to look back (default 7)
|
|
min_value: Minimum transaction value in dollars
|
|
top_n: Number of top results to return
|
|
return_structured: If True, returns list of dicts instead of markdown
|
|
deduplicate: If False and return_structured=True, returns all transactions
|
|
(not deduplicated by ticker). Useful for cluster detection.
|
|
"""
|
|
return get_insider_buying_screener(
|
|
transaction_type=transaction_type,
|
|
lookback_days=lookback_days,
|
|
min_value=min_value,
|
|
top_n=top_n,
|
|
return_structured=return_structured,
|
|
deduplicate=deduplicate,
|
|
)
|