feat: add live-tested gatekeeper data sources

This commit is contained in:
Ahmet Guzererler 2026-03-28 09:29:28 +01:00
parent 756d8358d7
commit 7aa76d0061
8 changed files with 248 additions and 84 deletions

View File

@ -25,10 +25,16 @@ All storage, event, checkpoint, and phase re-run logic is now documented in ADR
- **PR#108 merged**: Per-tier LLM fallback for 404/policy errors (ADR 017)
- **PR#107 merged**: `save_holding_review` per-ticker fix; RunLogger threading.local → contextvars
- **PR#106 merged**: MongoDB report store, RunLogger observability, reflexion memory
- **codex/global-search-graph-main-squash** (scanner gatekeeper foundation, local):
- Added live-tested `yfinance` gatekeeper universe query for US-listed liquid profitable mid-cap+ names
- Added live-tested Finviz gap-subset path using the bounded gatekeeper-plus-gap filter
- Narrowed Finviz usage to the gap/event layer instead of the full market-universe layer
- Next step is graph wiring so downstream candidate selection is hard-filtered by the gatekeeper universe
# In Progress
- claude/wizardly-poitras PR: storage finalisation + run history UX
- codex/global-search-graph-main-squash: wire gatekeeper universe into scanner graph and deterministic ranking
# Active Blockers

View File

@ -1,75 +0,0 @@
"""Live integration test for the real gap-detection data path.
This test intentionally exercises the raw yfinance path with no mocks before
the scanner tool is relied upon by the agent layer.
"""
import pytest
pytestmark = pytest.mark.integration
@pytest.mark.integration
def test_yfinance_gap_detection_data_path():
import yfinance as yf
screen = yf.screen("MOST_ACTIVES", count=10)
assert isinstance(screen, dict)
quotes = screen.get("quotes", [])
assert quotes, "MOST_ACTIVES returned no quotes"
symbols = []
for quote in quotes:
symbol = quote.get("symbol")
if symbol and symbol not in symbols:
symbols.append(symbol)
if len(symbols) == 5:
break
assert symbols, "No symbols extracted from screen results"
hist = yf.download(
symbols,
period="5d",
interval="1d",
auto_adjust=False,
progress=False,
threads=True,
)
assert not hist.empty, "download returned no OHLC data"
gap_rows = []
for symbol in symbols:
try:
opens = hist["Open"][symbol].dropna()
closes = hist["Close"][symbol].dropna()
except KeyError:
continue
if len(opens) < 1 or len(closes) < 2:
continue
today_open = float(opens.iloc[-1])
prev_close = float(closes.iloc[-2])
if prev_close == 0:
continue
gap_pct = (today_open - prev_close) / prev_close * 100
gap_rows.append((symbol, gap_pct))
assert gap_rows, "Could not compute any real gap percentages from live OHLC data"
assert all(isinstance(symbol, str) and isinstance(gap_pct, float) for symbol, gap_pct in gap_rows)
@pytest.mark.integration
def test_gap_candidates_tool_live():
from tradingagents.agents.utils.scanner_tools import get_gap_candidates
result = get_gap_candidates.invoke({})
assert isinstance(result, str)
assert (
"# Gap Candidates" in result
or "No stocks matched the live gap criteria today." in result
or "No stocks matched the live gap universe today." in result
)

View File

@ -0,0 +1,85 @@
"""Live integration tests for the gatekeeper universe and Finviz gap subset.
These tests intentionally hit real yfinance and finvizfinance paths with no
mocks so the scanner foundation is validated before more graph changes land.
"""
import pytest
pytestmark = [pytest.mark.integration, pytest.mark.enable_socket()]
def test_yfinance_gatekeeper_query_data_path():
import yfinance as yf
from yfinance import EquityQuery
query = EquityQuery(
"and",
[
EquityQuery("is-in", ["exchange", "NMS", "NYQ", "ASE"]),
EquityQuery("gte", ["intradaymarketcap", 2_000_000_000]),
EquityQuery("gt", ["netincomemargin.lasttwelvemonths", 0]),
EquityQuery("gt", ["avgdailyvol3m", 2_000_000]),
EquityQuery("gt", ["intradayprice", 5]),
],
)
result = yf.screen(query, size=10, sortField="dayvolume", sortAsc=False)
assert isinstance(result, dict)
quotes = result.get("quotes", [])
assert quotes, "Gatekeeper yfinance query returned no quotes"
us_exchanges = {"NMS", "NYQ", "ASE"}
for quote in quotes:
assert quote.get("exchange") in us_exchanges
assert float(quote.get("regularMarketPrice") or 0) > 5
assert float(quote.get("averageDailyVolume3Month") or 0) > 2_000_000
assert float(quote.get("marketCap") or 0) >= 2_000_000_000
def test_gatekeeper_universe_tool_live():
from tradingagents.agents.utils.scanner_tools import get_gatekeeper_universe
result = get_gatekeeper_universe.invoke({})
assert isinstance(result, str)
assert result.startswith("# Gatekeeper Universe") or result == "No stocks matched the gatekeeper universe today."
def test_finviz_gatekeeper_gap_filter_data_path():
from finvizfinance.screener.overview import Overview
overview = Overview()
overview.set_filter(
filters_dict={
"Market Cap.": "+Mid (over $2bln)",
"Net Profit Margin": "Positive (>0%)",
"Average Volume": "Over 2M",
"Price": "Over $5",
"Gap": "Up 5%",
}
)
df = overview.screener_view(limit=10, verbose=0)
if df is None:
pytest.skip("Finviz returned no page for the gatekeeper gap filter today")
assert hasattr(df, "empty")
if df.empty:
pytest.skip("No Finviz stocks matched the gatekeeper gap filter today")
assert "Ticker" in df.columns
assert len(df) >= 1
def test_gap_candidates_tool_live():
from tradingagents.agents.utils.scanner_tools import get_gap_candidates
result = get_gap_candidates.invoke({})
assert isinstance(result, str)
assert (
result.startswith("Top 5 stocks for gatekeeper_gap:")
or result == "No stocks matched the gatekeeper_gap criteria today."
or result.startswith("Smart money scan unavailable (Finviz error):")
)
assert "Invalid filter" not in result

View File

@ -196,6 +196,39 @@ class TestYfinanceScannerGapCandidates:
assert "No stocks matched the live gap criteria today." in result
class TestYfinanceScannerGatekeeperUniverse:
"""Offline tests for get_gatekeeper_universe_yfinance."""
def _quote(self, symbol, exchange="NMS", price=25.0, avg_volume=3_000_000, cur_volume=4_000_000, market_cap=5_000_000_000):
return {
"symbol": symbol,
"shortName": f"{symbol} Inc",
"exchange": exchange,
"regularMarketPrice": price,
"averageDailyVolume3Month": avg_volume,
"regularMarketVolume": cur_volume,
"marketCap": market_cap,
}
def test_returns_gatekeeper_table(self):
from tradingagents.dataflows.yfinance_scanner import get_gatekeeper_universe_yfinance
screen_data = {"quotes": [self._quote("NVDA"), self._quote("AAPL", exchange="NYQ")]}
with patch("tradingagents.dataflows.yfinance_scanner.yf.screen", return_value=screen_data):
result = get_gatekeeper_universe_yfinance(limit=10)
assert "# Gatekeeper Universe" in result
assert "NVDA" in result
def test_returns_no_match_message_when_empty(self):
from tradingagents.dataflows.yfinance_scanner import get_gatekeeper_universe_yfinance
with patch("tradingagents.dataflows.yfinance_scanner.yf.screen", return_value={"quotes": []}):
result = get_gatekeeper_universe_yfinance(limit=10)
assert result == "No stocks matched the gatekeeper universe today."
# ---------------------------------------------------------------------------
# yfinance scanner — get_market_indices_yfinance
# ---------------------------------------------------------------------------
@ -858,3 +891,12 @@ class TestFinvizSmartMoneyTools:
nvda_pos = result.find("NVDA")
amd_pos = result.find("AMD")
assert nvda_pos < amd_pos, "NVDA (higher volume) should appear before AMD"
def test_get_gap_candidates_uses_gatekeeper_gap_label(self):
from tradingagents.agents.utils.scanner_tools import get_gap_candidates
mock_cls = self._mock_overview(_make_finviz_df())
with patch("finvizfinance.screener.overview.Overview", mock_cls):
result = get_gap_candidates.invoke({})
assert "gatekeeper_gap" in result

View File

@ -40,15 +40,43 @@ def get_market_indices() -> str:
@tool
def get_gap_candidates() -> str:
def get_gatekeeper_universe() -> str:
"""
Get a bounded set of real gap-up candidates derived from live market data.
Uses the configured scanner_data vendor, but currently relies on yfinance.
Get the bounded stock universe used for downstream discovery.
Uses the configured scanner_data vendor and currently relies on yfinance's
equity screener with the following hardcoded constraints:
- US-listed stocks
- market cap >= $2B
- positive net margin
- average daily volume > 2M
- price > $5
Returns:
str: Formatted table of gap candidates with gap %, price change, and relative volume
str: Formatted table of gatekeeper-universe candidates
"""
return route_to_vendor("get_gap_candidates")
return route_to_vendor("get_gatekeeper_universe")
@tool
def get_gap_candidates() -> str:
"""
Get the Finviz gap-up subset of the gatekeeper universe.
Hardcoded to the exact gatekeeper filter plus Gap Up 5%, so the model
cannot hallucinate Finviz filter names or options.
Returns:
str: Formatted list of Finviz gap candidates
"""
return _run_finviz_screen(
{
"Market Cap.": "+Mid (over $2bln)",
"Net Profit Margin": "Positive (>0%)",
"Average Volume": "Over 2M",
"Price": "Over $5",
"Gap": "Up 5%",
},
label="gatekeeper_gap",
)
@tool

View File

@ -12,6 +12,7 @@ from .y_finance import (
)
from .yfinance_news import get_news_yfinance, get_global_news_yfinance
from .yfinance_scanner import (
get_gatekeeper_universe_yfinance,
get_market_movers_yfinance,
get_gap_candidates_yfinance,
get_market_indices_yfinance,
@ -89,6 +90,7 @@ TOOLS_CATEGORIES = {
"scanner_data": {
"description": "Market-wide scanner data (movers, indices, sectors, industries)",
"tools": [
"get_gatekeeper_universe",
"get_market_movers",
"get_gap_candidates",
"get_market_indices",
@ -171,6 +173,9 @@ VENDOR_METHODS = {
"yfinance": get_market_movers_yfinance,
"alpha_vantage": get_market_movers_alpha_vantage,
},
"get_gatekeeper_universe": {
"yfinance": get_gatekeeper_universe_yfinance,
},
"get_gap_candidates": {
"yfinance": get_gap_candidates_yfinance,
},

View File

@ -1,9 +1,12 @@
"""yfinance-based scanner data fetching functions for market-wide analysis."""
import yfinance as yf
import requests
from datetime import datetime
from typing import Annotated
import requests
import yfinance as yf
from yfinance import EquityQuery
from .finnhub_common import ThirdPartyTimeoutError
@ -192,6 +195,78 @@ def get_gap_candidates_yfinance() -> str:
return f"Error fetching live gap candidates: {str(e)}"
def get_gatekeeper_universe_yfinance(limit: int = 25) -> str:
"""
Build the bounded stock universe for downstream scanners using yfinance's
equity screener.
Mirrors the intended Finviz gatekeeper economics as closely as Yahoo's
query model allows:
- US listed equities only
- market cap >= $2B
- positive trailing-twelve-month net income margin
- average daily volume (3M) > 2M
- price > $5
Returns:
Markdown table of the gatekeeper universe candidates.
"""
try:
query = EquityQuery(
"and",
[
EquityQuery("is-in", ["exchange", "NMS", "NYQ", "ASE"]),
EquityQuery("gte", ["intradaymarketcap", 2_000_000_000]),
EquityQuery("gt", ["netincomemargin.lasttwelvemonths", 0]),
EquityQuery("gt", ["avgdailyvol3m", 2_000_000]),
EquityQuery("gt", ["intradayprice", 5]),
],
)
data = yf.screen(query, size=max(limit, 1), sortField="dayvolume", sortAsc=False)
if not data or not isinstance(data, dict):
return "No stocks matched the gatekeeper universe today."
quotes = data.get("quotes", [])
if not quotes:
return "No stocks matched the gatekeeper universe today."
header = "# Gatekeeper Universe\n"
header += "# Filters: US-listed, market cap >= $2B, positive net margin, avg volume > 2M, price > $5\n"
header += f"# Data retrieved on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
lines = [
header,
"| Symbol | Name | Exchange | Price | Avg Vol 3M | Current Vol | Market Cap |",
"|--------|------|----------|-------|------------|-------------|------------|",
]
for quote in quotes[:limit]:
symbol = quote.get("symbol", "N/A")
name = quote.get("shortName", quote.get("longName", "N/A"))
exchange = quote.get("exchange", "N/A")
price = quote.get("regularMarketPrice")
avg_vol = quote.get("averageDailyVolume3Month")
cur_vol = quote.get("regularMarketVolume")
market_cap = quote.get("marketCap")
price_str = f"${price:.2f}" if isinstance(price, (int, float)) else "N/A"
avg_vol_str = f"{avg_vol:,.0f}" if isinstance(avg_vol, (int, float)) else "N/A"
cur_vol_str = f"{cur_vol:,.0f}" if isinstance(cur_vol, (int, float)) else "N/A"
market_cap_str = f"${market_cap:,.0f}" if isinstance(market_cap, (int, float)) else "N/A"
lines.append(
f"| {symbol} | {name[:30]} | {exchange} | {price_str} | {avg_vol_str} | {cur_vol_str} | {market_cap_str} |"
)
return "\n".join(lines) + "\n"
except requests.exceptions.Timeout:
raise ThirdPartyTimeoutError("Request timed out fetching gatekeeper universe")
except ThirdPartyTimeoutError:
raise
except Exception as e:
return f"Error fetching gatekeeper universe: {str(e)}"
def get_market_indices_yfinance() -> str:
"""
Get major market indices data.

View File

@ -127,8 +127,6 @@ DEFAULT_CONFIG = {
"tool_vendors": {
# Finnhub free tier provides same data + MSPR aggregate bonus signal
"get_insider_transactions": "finnhub",
# First implementation is yfinance-only until another vendor is validated.
"get_gap_candidates": "yfinance",
},
# Report storage backend
# When mongo_uri is set, reports are persisted in MongoDB (never overwritten).