""" Finviz + Yahoo Finance Hybrid - Short Interest Discovery Uses Finviz to discover tickers with high short interest, then Yahoo Finance for exact data """ import re from concurrent.futures import ThreadPoolExecutor, as_completed from typing import Annotated import requests from bs4 import BeautifulSoup from tradingagents.dataflows.y_finance import get_ticker_info from tradingagents.utils.logger import get_logger logger = get_logger(__name__) def get_short_interest( min_short_interest_pct: Annotated[float, "Minimum short interest % of float"] = 10.0, min_days_to_cover: Annotated[float, "Minimum days to cover ratio"] = 2.0, top_n: Annotated[int, "Number of top results to return"] = 20, return_structured: Annotated[bool, "Return dict with raw data instead of markdown"] = False, ): """ Discover stocks with high short interest using Finviz + Yahoo Finance. Strategy: Finviz filters stocks by short interest (discovery), then Yahoo Finance provides exact short % data. This is a TRUE DISCOVERY tool - finds stocks we may not know about, not checking a predefined watchlist. Args: min_short_interest_pct: Minimum short interest as % of float min_days_to_cover: Minimum days to cover ratio top_n: Number of top results to return return_structured: If True, returns list of dicts instead of markdown Returns: If return_structured=True: list of candidate dicts with ticker, short_interest_pct, signal, etc. If return_structured=False: Formatted markdown report """ try: # Step 1: Use Finviz screener to DISCOVER tickers with high short interest logger.info( f"Discovering tickers with short interest >{min_short_interest_pct}% from Finviz..." ) # Determine Finviz filter if min_short_interest_pct >= 20: short_filter = "sh_short_o20" elif min_short_interest_pct >= 15: short_filter = "sh_short_o15" elif min_short_interest_pct >= 10: short_filter = "sh_short_o10" else: short_filter = "sh_short_o5" # Build Finviz URL (v=152 is simple view) base_url = f"https://finviz.com/screener.ashx?v=152&f={short_filter}" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", "Accept": "text/html", } discovered_tickers = [] # Scrape first 3 pages (60 stocks) for page_num in range(1, 4): if page_num == 1: url = base_url else: offset = (page_num - 1) * 20 + 1 url = f"{base_url}&r={offset}" response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") # Find ticker links in the page ticker_links = soup.find_all("a", href=re.compile(r"quote\.ashx\?t=")) for link in ticker_links: ticker = link.get_text(strip=True) # Validate it's a ticker (1-5 uppercase letters) if re.match(r"^[A-Z]{1,5}$", ticker) and ticker not in discovered_tickers: discovered_tickers.append(ticker) if not discovered_tickers: if return_structured: return [] return f"No stocks discovered with short interest >{min_short_interest_pct}% on Finviz." logger.info(f"Discovered {len(discovered_tickers)} tickers from Finviz") logger.info("Fetching detailed short interest data from Yahoo Finance...") # Step 2: Use Yahoo Finance to get EXACT short interest data for discovered tickers def fetch_short_data(ticker): try: info = get_ticker_info(ticker) # Get short interest data short_pct = info.get("shortPercentOfFloat", info.get("sharesPercentSharesOut", 0)) if short_pct and isinstance(short_pct, (int, float)): short_pct = short_pct * 100 # Convert to percentage else: return None # Verify it meets criteria (Finviz filter might be outdated) if short_pct >= min_short_interest_pct: price = info.get("currentPrice", info.get("regularMarketPrice", 0)) market_cap = info.get("marketCap", 0) volume = info.get("volume", info.get("regularMarketVolume", 0)) # Days to cover (short ratio): shares short / avg daily volume days_to_cover = info.get("shortRatio") if days_to_cover is None or not isinstance(days_to_cover, (int, float)): days_to_cover = 0.0 # Apply days-to-cover filter if days_to_cover < min_days_to_cover: return None # Categorize squeeze potential if short_pct >= 30: signal = "extreme_squeeze_risk" elif short_pct >= 20: signal = "high_squeeze_potential" elif short_pct >= 15: signal = "moderate_squeeze_potential" else: signal = "low_squeeze_potential" return { "ticker": ticker, "price": price, "market_cap": market_cap, "volume": volume, "short_interest_pct": short_pct, "days_to_cover": days_to_cover, "signal": signal, } except Exception: return None # Fetch data in parallel (faster) all_candidates = [] with ThreadPoolExecutor(max_workers=10) as executor: futures = { executor.submit(fetch_short_data, ticker): ticker for ticker in discovered_tickers } for future in as_completed(futures): result = future.result() if result: all_candidates.append(result) if not all_candidates: if return_structured: return [] return f"No stocks with verified short interest >{min_short_interest_pct}% (Finviz found {len(discovered_tickers)} tickers but Yahoo Finance data didn't confirm)." # Sort by short interest percentage (highest first) sorted_candidates = sorted( all_candidates, key=lambda x: x["short_interest_pct"], reverse=True )[:top_n] # Return structured data if requested if return_structured: return sorted_candidates # Format output report = "# Discovered High Short Interest Stocks\n\n" report += f"**Criteria**: Short Interest >{min_short_interest_pct}%\n" report += "**Data Source**: Finviz Screener (Web Scraping)\n" report += f"**Total Discovered**: {len(all_candidates)} stocks\n\n" report += f"**Top {len(sorted_candidates)} Candidates**:\n\n" report += "| Ticker | Price | Market Cap | Volume | Short % | Signal |\n" report += "|--------|-------|------------|--------|---------|--------|\n" for candidate in sorted_candidates: market_cap_str = format_market_cap(candidate["market_cap"]) report += f"| {candidate['ticker']} | " report += f"${candidate['price']:.2f} | " report += f"{market_cap_str} | " report += f"{candidate['volume']:,} | " report += f"{candidate['short_interest_pct']:.1f}% | " report += f"{candidate['signal']} |\n" report += "\n\n## Signal Definitions\n\n" report += "- **extreme_squeeze_risk**: Short interest >30% - Very high squeeze potential\n" report += "- **high_squeeze_potential**: Short interest 20-30% - High squeeze risk\n" report += ( "- **moderate_squeeze_potential**: Short interest 15-20% - Moderate squeeze risk\n" ) report += "- **low_squeeze_potential**: Short interest 10-15% - Lower squeeze risk\n\n" report += "**Note**: High short interest alone doesn't guarantee a squeeze. Look for positive catalysts.\n" return report except requests.exceptions.RequestException as e: if return_structured: return [] return f"Error scraping Finviz: {str(e)}" except Exception as e: if return_structured: return [] return f"Unexpected error discovering short interest stocks: {str(e)}" def parse_market_cap(market_cap_text: str) -> float: """Parse market cap from Finviz format (e.g., '1.23B', '456M').""" if not market_cap_text or market_cap_text == "-": return 0.0 market_cap_text = market_cap_text.upper().strip() # Extract number and multiplier match = re.match(r"([0-9.]+)([BMK])?", market_cap_text) if not match: return 0.0 number = float(match.group(1)) multiplier = match.group(2) if multiplier == "B": return number * 1_000_000_000 elif multiplier == "M": return number * 1_000_000 elif multiplier == "K": return number * 1_000 else: return number def format_market_cap(market_cap: float) -> str: """Format market cap for display.""" if market_cap >= 1_000_000_000: return f"${market_cap / 1_000_000_000:.2f}B" elif market_cap >= 1_000_000: return f"${market_cap / 1_000_000:.2f}M" else: return f"${market_cap:,.0f}" def get_finviz_short_interest( min_short_interest_pct: float = 10.0, min_days_to_cover: float = 2.0, top_n: int = 20, ) -> str: """Alias for get_short_interest to match registry naming convention""" return get_short_interest(min_short_interest_pct, min_days_to_cover, top_n) def get_insider_buying_screener( transaction_type: Annotated[str, "Transaction type: 'buy', 'sell', or 'any'"] = "buy", lookback_days: Annotated[int, "Days to look back for transactions"] = 7, min_value: Annotated[int, "Minimum transaction value in dollars"] = 25000, top_n: Annotated[int, "Number of top results to return"] = 20, return_structured: Annotated[bool, "Return list of dicts instead of markdown"] = False, deduplicate: Annotated[bool, "If False, return all transactions without deduplication"] = True, ): """ Discover stocks with recent insider buying/selling using OpenInsider. LEADING INDICATOR: Insiders buying their own stock before price moves. Results are sorted by transaction value (largest first). Args: transaction_type: "buy" for purchases, "sell" for sales lookback_days: Days to look back (default 7) min_value: Minimum transaction value in dollars top_n: Number of top results to return return_structured: If True, returns list of dicts instead of markdown Returns: If return_structured=True: list of transaction dicts If return_structured=False: Formatted markdown report """ try: filter_desc = "insider buying" if transaction_type == "buy" else "insider selling" logger.info(f"Discovering tickers with {filter_desc} from OpenInsider...") # OpenInsider screener URL # xp=1 means exclude private transactions # fd=7 means last 7 days filing date # vl=25 means minimum value $25k if transaction_type == "buy": url = f"http://openinsider.com/screener?s=&o=&pl=&ph=&ll=&lh=&fd={lookback_days}&fdr=&td=0&tdr=&fdlyl=&fdlyh=&dtefrom=&dteto=&xp=1&vl={min_value // 1000}&vh=&ocl=&och=&session=all&cnt=100&page=1" else: url = f"http://openinsider.com/screener?s=&o=&pl=&ph=&ll=&lh=&fd={lookback_days}&fdr=&td=0&tdr=&fdlyl=&fdlyh=&dtefrom=&dteto=&xs=1&vl={min_value // 1000}&vh=&ocl=&och=&sic1=-1&sicl=100&sich=9999&grp=0&nfl=&nfh=&nil=&nih=&nol=&noh=&v2l=&v2h=&oc2l=&oc2h=&sortcol=4&cnt=100&page=1" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", "Accept": "text/html", } response = requests.get(url, headers=headers, timeout=60) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") # Find the main data table table = soup.find("table", class_="tinytable") if not table: return f"No {filter_desc} data found on OpenInsider." tbody = table.find("tbody") if not tbody: return f"No {filter_desc} data found on OpenInsider." rows = tbody.find_all("tr") transactions = [] for row in rows: cells = row.find_all("td") if len(cells) < 12: continue try: # OpenInsider columns: # 0: X (checkbox), 1: Filing Date, 2: Trade Date, 3: Ticker, 4: Company Name # 5: Insider Name, 6: Title, 7: Trade Type, 8: Price, 9: Qty, 10: Owned, 11: ΔOwn, 12: Value ticker_cell = cells[3] ticker_link = ticker_cell.find("a") ticker = ticker_link.get_text(strip=True) if ticker_link else "" if not ticker or not re.match(r"^[A-Z]{1,5}$", ticker): continue company = cells[4].get_text(strip=True)[:40] if len(cells) > 4 else "" insider_name = cells[5].get_text(strip=True)[:25] if len(cells) > 5 else "" title_raw = cells[6].get_text(strip=True) if len(cells) > 6 else "" # "10%" means 10% beneficial owner - clarify for readability title = "10% Owner" if title_raw == "10%" else title_raw[:20] trade_type = cells[7].get_text(strip=True) if len(cells) > 7 else "" price = cells[8].get_text(strip=True) if len(cells) > 8 else "" qty = cells[9].get_text(strip=True) if len(cells) > 9 else "" value_str = cells[12].get_text(strip=True) if len(cells) > 12 else "" # Filter by transaction type trade_type_lower = trade_type.lower() if ( transaction_type == "buy" and "buy" not in trade_type_lower and "p -" not in trade_type_lower ): continue if ( transaction_type == "sell" and "sale" not in trade_type_lower and "s -" not in trade_type_lower ): continue # Parse value for sorting value_num = 0 if value_str: # Remove $ and + signs, handle K/M suffixes clean_value = ( value_str.replace("$", "").replace("+", "").replace(",", "").strip() ) try: if "M" in clean_value: value_num = float(clean_value.replace("M", "")) * 1_000_000 elif "K" in clean_value: value_num = float(clean_value.replace("K", "")) * 1_000 else: value_num = float(clean_value) except ValueError: value_num = 0 transactions.append( { "ticker": ticker, "company": company, "insider": insider_name, "title": title, "trade_type": trade_type, "price": price, "qty": qty, "value_str": value_str, "value_num": value_num, } ) except Exception: continue if not transactions: if return_structured: return [] return f"No {filter_desc} transactions found in the last {lookback_days} days." # Sort by value (largest first) transactions.sort(key=lambda x: x["value_num"], reverse=True) # Return all transactions without deduplication if requested if return_structured and not deduplicate: logger.info(f"Returning all {len(transactions)} {filter_desc} transactions (no dedup)") return transactions # Deduplicate by ticker, keeping the largest transaction per ticker seen_tickers = set() unique_transactions = [] for t in transactions: if t["ticker"] not in seen_tickers: seen_tickers.add(t["ticker"]) unique_transactions.append(t) if len(unique_transactions) >= top_n: break logger.info( f"Discovered {len(unique_transactions)} tickers with {filter_desc} (sorted by value)" ) # Return structured data if requested if return_structured: return unique_transactions # Format report report_lines = [ f"# Insider {'Buying' if transaction_type == 'buy' else 'Selling'} Report", f"*Top {len(unique_transactions)} stocks by transaction value (last {lookback_days} days)*\n", "| Ticker | Company | Insider | Title | Value | Price |", "|--------|---------|---------|-------|-------|-------|", ] for t in unique_transactions: report_lines.append( f"| {t['ticker']} | {t['company']} | {t['insider']} | {t['title']} | {t['value_str']} | {t['price']} |" ) report_lines.append( f"\n**Total: {len(unique_transactions)} stocks with significant {filter_desc}**" ) report_lines.append("*Sorted by transaction value (largest first)*") return "\n".join(report_lines) except requests.exceptions.RequestException as e: if return_structured: return [] return f"Error fetching insider data from OpenInsider: {e}" except Exception as e: if return_structured: return [] return f"Error processing insider screener: {e}" def get_finviz_insider_buying( transaction_type: str = "buy", lookback_days: int = 7, min_value: int = 25000, top_n: int = 20, return_structured: bool = False, deduplicate: bool = True, ): """Alias for get_insider_buying_screener to match registry naming convention. Args: transaction_type: "buy" for purchases, "sell" for sales lookback_days: Days to look back (default 7) min_value: Minimum transaction value in dollars top_n: Number of top results to return return_structured: If True, returns list of dicts instead of markdown deduplicate: If False and return_structured=True, returns all transactions (not deduplicated by ticker). Useful for cluster detection. """ return get_insider_buying_screener( transaction_type=transaction_type, lookback_days=lookback_days, min_value=min_value, top_n=top_n, return_structured=return_structured, deduplicate=deduplicate, )