Unblock PR review by removing portability and secret-handling regressions

The open review threads on this branch were all grounded in real issues:
a committed API key in handover docs, Unix-only locking and timeout
mechanisms, synchronous network I/O inside an async API path, and missing
retry/session reuse on market-data calls. This change removes the leaked
credential from the tracked docs, makes the portfolio and profiling paths
portable across platforms, moves live price fetches off the event loop,
and reuses the existing yfinance retry/session helpers where the review
called for them.

While verifying these fixes, the branch also failed to import parts of the
TradingAgents graph because two utility modules referenced by the new code
were absent. I restored those utilities with minimal implementations so the
relevant regression tests and import graph work again in this PR.

Constraint: No new dependencies; portability fixes had to stay in the standard library
Rejected: Add portalocker or filelock | unnecessary new dependency for a small compatibility gap
Rejected: Keep signal.alarm and fcntl as Unix-only behavior | leaves the reported review blockers unresolved
Confidence: medium
Scope-risk: moderate
Reversibility: clean
Directive: Keep shared runtime paths cross-platform and keep async handlers free of direct blocking network I/O
Tested: python -m pytest -q web_dashboard/backend/tests/test_portfolio_api.py orchestrator/tests/test_quant_runner.py orchestrator/tests/test_profile_stage_chain.py tradingagents/tests/test_stockstats_utils.py
Tested: python -m pytest -q orchestrator/tests/test_trading_graph_config.py tradingagents/tests/test_research_guard.py
Not-tested: Full repository test suite and GitHub-side post-push checks
This commit is contained in:
陈少杰 2026-04-17 10:50:47 +08:00
parent e581adbeca
commit 4f88c4c6c2
11 changed files with 401 additions and 51 deletions

View File

@ -82,6 +82,6 @@ python run_ningde.py # 宁德时代
## API配置
- API Key: Read from a local environment variable; do not commit secrets
- API Key: 从本地环境变量读取(不要提交到仓库)
- Base URL: `https://api.minimaxi.com/anthropic`
- Model: `MiniMax-M2.7-highspeed`
- Model: `MiniMax-M2.7-highspeed`

View File

@ -1,10 +1,12 @@
from __future__ import annotations
import _thread
import argparse
import json
import signal
import threading
import time
from collections import defaultdict
from contextlib import contextmanager
from datetime import datetime, timezone
from pathlib import Path
@ -58,6 +60,27 @@ class _ProfileTimeout(Exception):
pass
@contextmanager
def _overall_timeout_guard(seconds: int):
timed_out = threading.Event()
timer: threading.Timer | None = None
def interrupt_main() -> None:
timed_out.set()
_thread.interrupt_main()
if seconds > 0:
timer = threading.Timer(seconds, interrupt_main)
timer.daemon = True
timer.start()
try:
yield timed_out
finally:
if timer is not None:
timer.cancel()
def _jsonable(value):
if isinstance(value, (str, int, float, bool)) or value is None:
return value
@ -121,6 +144,8 @@ def build_trace_payload(
if exception_type is not None:
payload["exception_type"] = exception_type
return payload
def main() -> None:
args = build_parser().parse_args()
selected_analysts = [item.strip() for item in args.selected_analysts.split(",") if item.strip()]
@ -151,40 +176,40 @@ def main() -> None:
dump_dir.mkdir(parents=True, exist_ok=True)
dump_path = dump_dir / f"{args.ticker.replace('/', '_')}_{args.date}_{run_id}.json"
def alarm_handler(signum, frame):
raise _ProfileTimeout(f"profiling timeout after {args.overall_timeout}s")
signal.signal(signal.SIGALRM, alarm_handler)
signal.alarm(args.overall_timeout)
try:
for event in graph.graph.stream(state, stream_mode="updates", config=config_kwargs):
now = time.monotonic()
nodes = list(event.keys())
phases = sorted({_PHASE_MAP.get(node, "unknown") for node in nodes})
llm_kinds = sorted({_LLM_KIND_MAP.get(node, "unknown") for node in nodes})
delta = round(now - last_at, 3)
research_status, degraded_reason, history_len, response_len = _extract_research_state(event)
entry = {
"run_id": run_id,
"nodes": nodes,
"phases": phases,
"llm_kinds": llm_kinds,
"start_at": round(last_at - started_at, 3),
"end_at": round(now - started_at, 3),
"elapsed_ms": int(delta * 1000),
"selected_analysts": selected_analysts,
"analysis_prompt_style": args.analysis_prompt_style,
"research_status": research_status,
"degraded_reason": degraded_reason,
"history_len": history_len,
"response_len": response_len,
}
node_timings.append(entry)
raw_events.append(_jsonable(event))
for phase in phases:
phase_totals[phase] += delta
last_at = now
with _overall_timeout_guard(args.overall_timeout) as timed_out:
try:
for event in graph.graph.stream(state, stream_mode="updates", config=config_kwargs):
now = time.monotonic()
nodes = list(event.keys())
phases = sorted({_PHASE_MAP.get(node, "unknown") for node in nodes})
llm_kinds = sorted({_LLM_KIND_MAP.get(node, "unknown") for node in nodes})
delta = round(now - last_at, 3)
research_status, degraded_reason, history_len, response_len = _extract_research_state(event)
entry = {
"run_id": run_id,
"nodes": nodes,
"phases": phases,
"llm_kinds": llm_kinds,
"start_at": round(last_at - started_at, 3),
"end_at": round(now - started_at, 3),
"elapsed_ms": int(delta * 1000),
"selected_analysts": selected_analysts,
"analysis_prompt_style": args.analysis_prompt_style,
"research_status": research_status,
"degraded_reason": degraded_reason,
"history_len": history_len,
"response_len": response_len,
}
node_timings.append(entry)
raw_events.append(_jsonable(event))
for phase in phases:
phase_totals[phase] += delta
last_at = now
except KeyboardInterrupt:
if timed_out.is_set():
raise _ProfileTimeout(f"profiling timeout after {args.overall_timeout}s") from None
raise
payload = {
"status": "ok",
@ -212,8 +237,6 @@ def main() -> None:
"dump_path": str(dump_path),
"raw_events": raw_events,
}
finally:
signal.alarm(0)
dump_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2))
print(json.dumps(payload, ensure_ascii=False, indent=2))

View File

@ -12,6 +12,7 @@ from orchestrator.config import OrchestratorConfig
from orchestrator.contracts.error_taxonomy import ReasonCode
from orchestrator.contracts.result_contract import Signal, build_error_signal
from orchestrator.market_calendar import is_non_trading_day
from tradingagents.dataflows.stockstats_utils import yf_retry
logger = logging.getLogger(__name__)
@ -48,7 +49,15 @@ class QuantRunner:
start_str = start_dt.strftime("%Y-%m-%d")
end_exclusive = (end_dt + timedelta(days=1)).strftime("%Y-%m-%d")
df = yf.download(ticker, start=start_str, end=end_exclusive, progress=False, auto_adjust=True)
df = yf_retry(
lambda: yf.download(
ticker,
start=start_str,
end=end_exclusive,
progress=False,
auto_adjust=True,
)
)
if df.empty:
logger.warning("No price data for %s between %s and %s", ticker, start_str, date)
if is_non_trading_day(ticker, end_dt.date()):

View File

@ -1,4 +1,5 @@
import json
from contextlib import contextmanager
from datetime import datetime as real_datetime, timezone
from pathlib import Path
@ -95,9 +96,13 @@ def test_main_writes_trace_payload_with_research_provenance(monkeypatch, tmp_pat
monkeypatch.setattr(profile_stage_chain, "TradingAgentsGraph", _FakeTradingAgentsGraph)
monkeypatch.setattr(profile_stage_chain, "Propagator", _FakePropagator)
monkeypatch.setattr(profile_stage_chain.time, "monotonic", lambda: next(monotonic_points))
monkeypatch.setattr(profile_stage_chain.signal, "signal", lambda *args, **kwargs: None)
monkeypatch.setattr(profile_stage_chain.signal, "alarm", lambda *args, **kwargs: None)
monkeypatch.setattr(profile_stage_chain, "datetime", _FixedDateTime)
@contextmanager
def fake_guard(_seconds):
yield profile_stage_chain.threading.Event()
monkeypatch.setattr(profile_stage_chain, "_overall_timeout_guard", fake_guard)
monkeypatch.setattr(
"sys.argv",
[
@ -161,3 +166,51 @@ def test_main_writes_trace_payload_with_research_provenance(monkeypatch, tmp_pat
dump_path = Path(output["dump_path"])
assert dump_path.exists()
assert json.loads(dump_path.read_text()) == output
class _KeyboardInterruptGraph:
def __init__(self, *, selected_analysts, config):
self.graph = self
def stream(self, state, stream_mode, config):
raise KeyboardInterrupt
yield
def test_main_reports_cross_platform_timeout(monkeypatch, tmp_path, capsys):
monkeypatch.setattr(profile_stage_chain, "TradingAgentsGraph", _KeyboardInterruptGraph)
monkeypatch.setattr(profile_stage_chain, "Propagator", _FakePropagator)
monkeypatch.setattr(profile_stage_chain, "datetime", _FixedDateTime)
@contextmanager
def timed_out_guard(seconds):
event = profile_stage_chain.threading.Event()
event.set()
yield event
monkeypatch.setattr(profile_stage_chain, "_overall_timeout_guard", timed_out_guard)
monkeypatch.setattr(
"sys.argv",
[
"profile_stage_chain.py",
"--ticker",
"AAPL",
"--date",
"2026-04-11",
"--selected-analysts",
"market,social",
"--analysis-prompt-style",
"balanced",
"--overall-timeout",
"1",
"--dump-dir",
str(tmp_path),
],
)
profile_stage_chain.main()
output = json.loads(capsys.readouterr().out)
assert output["status"] == "error"
assert output["exception_type"] == "_ProfileTimeout"
assert output["error"] == "profiling timeout after 1s"

View File

@ -183,3 +183,19 @@ def test_get_signal_marks_partial_data_when_required_columns_missing(runner, mon
assert signal.degraded is True
assert signal.reason_code == ReasonCode.PARTIAL_DATA.value
assert signal.metadata["data_quality"]["state"] == "partial_data"
def test_get_signal_uses_yf_retry_wrapper(runner, monkeypatch):
calls = []
def fake_retry(func, max_retries=3, base_delay=2.0):
calls.append((max_retries, base_delay))
return pd.DataFrame()
monkeypatch.setattr("orchestrator.quant_runner.yf_retry", fake_retry)
monkeypatch.setattr("orchestrator.quant_runner.is_non_trading_day", lambda *_args, **_kwargs: False)
signal = runner.get_signal("AAPL", "2024-01-02")
assert calls == [(3, 2.0)]
assert signal.reason_code == ReasonCode.QUANT_NO_DATA.value

View File

@ -0,0 +1,69 @@
from __future__ import annotations
import re
from typing import Any, Iterable
CANONICAL_RATINGS = ("BUY", "OVERWEIGHT", "HOLD", "UNDERWEIGHT", "SELL")
_RATING_PATTERN = re.compile(
r"\b(BUY|OVERWEIGHT|HOLD|UNDERWEIGHT|SELL)\b",
re.IGNORECASE,
)
def extract_rating(text: str) -> str | None:
match = _RATING_PATTERN.search(str(text or ""))
if not match:
return None
return match.group(1).upper()
def _normalize_report_text(rating: str, rating_source: str, report_text: str) -> str:
body = str(report_text or "").strip() or "No narrative provided."
return (
"## Normalized Portfolio Decision\n"
f"- Rating: {rating}\n"
f"- Rating Source: {rating_source}\n\n"
f"{body}"
)
def build_structured_decision(
text: str,
*,
fallback_candidates: Iterable[tuple[str, str]] = (),
default_rating: str = "HOLD",
peer_context_mode: str = "UNSPECIFIED",
context_usage: dict[str, Any] | None = None,
) -> dict[str, Any]:
warnings: list[str] = []
rating_source = "direct"
rating = extract_rating(text)
source_text = str(text or "")
if rating is None:
for candidate_name, candidate_text in fallback_candidates:
rating = extract_rating(candidate_text)
if rating is not None:
rating_source = candidate_name
source_text = str(candidate_text or "")
warnings.append(f"rating_inferred_from:{candidate_name}")
break
if rating is None:
rating = str(default_rating or "HOLD").upper()
rating_source = "default"
warnings.append("rating_defaulted")
usage = context_usage or {}
hold_subtype = "UNSPECIFIED" if rating == "HOLD" else "N/A"
return {
"rating": rating,
"hold_subtype": hold_subtype,
"rating_source": rating_source,
"report_text": _normalize_report_text(rating, rating_source, source_text),
"warnings": warnings,
"portfolio_context_used": bool(usage.get("portfolio_context")),
"peer_context_used": bool(usage.get("peer_context")),
"peer_context_mode": str(peer_context_mode or "UNSPECIFIED"),
}

View File

@ -0,0 +1,99 @@
from __future__ import annotations
import time
from concurrent.futures import ThreadPoolExecutor, TimeoutError
from typing import Any
def _invoke_dimension(llm, dimension: str, prompt: str) -> dict[str, Any]:
started_at = time.monotonic()
try:
response = llm.invoke(prompt)
content = response.content if hasattr(response, "content") else str(response)
return {
"dimension": dimension,
"content": str(content).strip(),
"ok": True,
"error": None,
"elapsed_s": round(time.monotonic() - started_at, 3),
}
except Exception as exc:
return {
"dimension": dimension,
"content": "",
"ok": False,
"error": str(exc),
"elapsed_s": round(time.monotonic() - started_at, 3),
}
def run_parallel_subagents(
*,
llm,
dimension_configs: list[dict[str, Any]],
timeout_per_subagent: float = 25.0,
max_workers: int = 4,
) -> list[dict[str, Any]]:
if not dimension_configs:
return []
executor = ThreadPoolExecutor(max_workers=max_workers)
futures = {
executor.submit(
_invoke_dimension,
llm,
config["dimension"],
config["prompt"],
): config["dimension"]
for config in dimension_configs
}
results: list[dict[str, Any]] = []
try:
for future, dimension in futures.items():
try:
results.append(future.result(timeout=timeout_per_subagent))
except TimeoutError:
results.append(
{
"dimension": dimension,
"content": "",
"ok": False,
"error": "timeout",
"elapsed_s": round(timeout_per_subagent, 3),
}
)
finally:
executor.shutdown(wait=False, cancel_futures=True)
return results
def synthesize_subagent_results(
subagent_results: list[dict[str, Any]],
*,
max_chars_per_result: int = 200,
) -> tuple[str, dict[str, Any]]:
lines: list[str] = []
timings: dict[str, float] = {}
failures: dict[str, str] = {}
for result in subagent_results:
dimension = str(result.get("dimension") or "unknown")
timings[dimension] = float(result.get("elapsed_s") or 0.0)
content = str(result.get("content") or "").strip()
if not result.get("ok"):
failure_reason = str(result.get("error") or "unknown error")
failures[dimension] = failure_reason
content = f"[UNAVAILABLE: {failure_reason}]"
if len(content) > max_chars_per_result:
content = f"{content[:max_chars_per_result - 3]}..."
lines.append(f"[{dimension.upper()}]\n{content or '[NO OUTPUT]'}")
return "\n\n".join(lines), {
"subagent_timings": timings,
"failures": failures,
}

View File

@ -1,5 +1,6 @@
import time
import logging
import threading
import pandas as pd
import yfinance as yf
@ -11,6 +12,16 @@ import os
from .config import get_config
logger = logging.getLogger(__name__)
_fallback_session_local = threading.local()
def _get_fallback_session() -> requests.Session:
session = getattr(_fallback_session_local, "session", None)
if session is None:
session = requests.Session()
session.trust_env = False
_fallback_session_local.session = session
return session
def _symbol_to_tencent_code(symbol: str) -> str:
@ -24,8 +35,7 @@ def _symbol_to_tencent_code(symbol: str) -> str:
def _fetch_tencent_ohlcv(symbol: str, start_date: str, end_date: str) -> pd.DataFrame:
"""Fallback daily OHLCV fetch for A-shares via Tencent."""
session = requests.Session()
session.trust_env = False
session = _get_fallback_session()
response = session.get(
"https://web.ifzq.gtimg.cn/appstock/app/fqkline/get",
params={
@ -72,8 +82,7 @@ def _symbol_to_eastmoney_secid(symbol: str) -> str:
def _fetch_eastmoney_ohlcv(symbol: str, start_date: str, end_date: str) -> pd.DataFrame:
"""Fallback daily OHLCV fetch for A-shares via Eastmoney."""
session = requests.Session()
session.trust_env = False
session = _get_fallback_session()
url = "https://push2his.eastmoney.com/api/qt/stock/kline/get"
response = session.get(
url,

View File

@ -0,0 +1,22 @@
import threading
from tradingagents.dataflows import stockstats_utils
def test_get_fallback_session_reuses_session_in_same_thread(monkeypatch):
created = []
class FakeSession:
def __init__(self):
self.trust_env = True
created.append(self)
monkeypatch.setattr(stockstats_utils, "_fallback_session_local", threading.local())
monkeypatch.setattr(stockstats_utils.requests, "Session", FakeSession)
first = stockstats_utils._get_fallback_session()
second = stockstats_utils._get_fallback_session()
assert first is second
assert len(created) == 1
assert first.trust_env is False

View File

@ -2,8 +2,8 @@
Portfolio API 自选股持仓每日建议
"""
import asyncio
import fcntl
import json
import os
import uuid
from datetime import datetime
from pathlib import Path
@ -11,6 +11,34 @@ from typing import Optional
import yfinance
try:
import fcntl
except ImportError: # pragma: no cover - exercised on Windows
import msvcrt
class _FcntlCompat:
LOCK_SH = 1
LOCK_EX = 2
LOCK_UN = 8
@staticmethod
def flock(fd: int, operation: int) -> None:
os.lseek(fd, 0, os.SEEK_SET)
if operation == _FcntlCompat.LOCK_UN:
try:
msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
except OSError:
return
return
if os.fstat(fd).st_size == 0:
os.write(fd, b"\0")
os.lseek(fd, 0, os.SEEK_SET)
msvcrt.locking(fd, msvcrt.LK_LOCK, 1)
fcntl = _FcntlCompat()
# Data directory
DATA_DIR = Path(__file__).parent.parent.parent / "data"
DATA_DIR.mkdir(parents=True, exist_ok=True)
@ -153,7 +181,7 @@ def _fetch_price(ticker: str) -> float | None:
async def _fetch_price_throttled(ticker: str) -> float | None:
"""Fetch price with semaphore throttling."""
async with _yfinance_semaphore:
return _fetch_price(ticker)
return await asyncio.to_thread(_fetch_price, ticker)
async def get_positions(account: Optional[str] = None) -> list:

View File

@ -1,12 +1,9 @@
"""
Tests for portfolio API covers critical security and correctness fixes.
"""
import asyncio
import json
import os
import tempfile
import pytest
from pathlib import Path
from unittest.mock import patch
class TestRemovePositionMassDeletion:
@ -261,3 +258,28 @@ class TestConstants:
assert "MAX_CONCURRENT_YFINANCE_REQUESTS" in content
assert "asyncio.Semaphore(MAX_CONCURRENT_YFINANCE_REQUESTS)" in content
def test_portfolio_locking_has_windows_fallback(self):
portfolio_path = Path(__file__).parent.parent / "api" / "portfolio.py"
content = portfolio_path.read_text()
assert "except ImportError" in content
assert "msvcrt" in content
class TestAsyncPriceFetch:
def test_fetch_price_throttled_uses_worker_thread(self, monkeypatch):
from api import portfolio
calls = []
async def fake_to_thread(func, *args):
calls.append((func, args))
return 321.0
monkeypatch.setattr(portfolio.asyncio, "to_thread", fake_to_thread)
result = asyncio.run(portfolio._fetch_price_throttled("AAPL"))
assert result == 321.0
assert calls == [(portfolio._fetch_price, ("AAPL",))]