Unify research provenance extraction and persist it into state logs

The earlier Phase 1-4 recovery left one unique worker-1 slice unrecovered: provenance extraction logic was still duplicated in the runner and the full-state log path still dropped the structured research fields. This change centralizes provenance extraction in agent state helpers, reuses it from the LLM runner, and writes the same structured fields into TradingAgents full-state logs with focused regression tests.\n\nConstraint: Preserve the existing debate-string output shape while making provenance reuse consistent across runner and state-log surfaces\nRejected: Cherry-pick worker-1 auto-checkpoint wholesale | it mixed duplicate A/B files and uv.lock churn with the useful provenance helper changes\nConfidence: high\nScope-risk: narrow\nDirective: Keep research provenance extraction centralized; new consumers should call the helper instead of re-listing field names by hand\nTested: python -m pytest -q tradingagents/tests/test_research_guard.py orchestrator/tests/test_trading_graph_config.py orchestrator/tests/test_llm_runner.py orchestrator/tests/test_profile_stage_chain.py orchestrator/tests/test_profile_ab.py orchestrator/tests/test_contract_v1alpha1.py orchestrator/tests/test_live_mode.py\nTested: python -m compileall tradingagents/agents/utils/agent_states.py tradingagents/graph/trading_graph.py orchestrator/llm_runner.py orchestrator/tests/test_trading_graph_config.py tradingagents/tests/test_research_guard.py\nNot-tested: Live-provider end-to-end analysis run that emits a new full_states_log file
This commit is contained in:
陈少杰 2026-04-14 13:34:25 +08:00
parent 8c6da22f4f
commit 64e3583f66
5 changed files with 113 additions and 14 deletions

View File

@ -6,6 +6,7 @@ from datetime import datetime, timezone
from orchestrator.config import OrchestratorConfig
from orchestrator.contracts.error_taxonomy import ReasonCode
from orchestrator.contracts.result_contract import Signal, build_error_signal
from tradingagents.agents.utils.agent_states import extract_research_provenance
logger = logging.getLogger(__name__)
@ -20,18 +21,7 @@ def _extract_research_metadata(final_state: dict | None) -> dict | None:
if not isinstance(final_state, dict):
return None
debate_state = final_state.get("investment_debate_state") or {}
if not isinstance(debate_state, dict):
return None
keys = (
"research_status",
"research_mode",
"timed_out_nodes",
"degraded_reason",
"covered_dimensions",
"manager_confidence",
)
metadata = {key: debate_state.get(key) for key in keys if key in debate_state}
return metadata or None
return extract_research_provenance(debate_state)
class LLMRunner:

View File

@ -1,5 +1,7 @@
import json
from tradingagents.default_config import DEFAULT_CONFIG
from tradingagents.graph.trading_graph import _merge_with_default_config
from tradingagents.graph.trading_graph import TradingAgentsGraph, _merge_with_default_config
def test_merge_with_default_config_keeps_required_defaults():
@ -27,3 +29,51 @@ def test_merge_with_default_config_merges_nested_vendor_settings():
assert merged["data_vendors"]["news_data"] == "alpha_vantage"
assert merged["data_vendors"]["core_stock_apis"] == DEFAULT_CONFIG["data_vendors"]["core_stock_apis"]
assert merged["tool_vendors"]["get_stock_data"] == "alpha_vantage"
def test_log_state_persists_research_provenance(tmp_path):
graph = TradingAgentsGraph.__new__(TradingAgentsGraph)
graph.config = {"results_dir": str(tmp_path)}
graph.ticker = "AAPL"
graph.log_states_dict = {}
final_state = {
"company_of_interest": "AAPL",
"trade_date": "2026-04-11",
"market_report": "",
"sentiment_report": "",
"news_report": "",
"fundamentals_report": "",
"investment_debate_state": {
"bull_history": "Bull Analyst: case",
"bear_history": "Bear Analyst: case",
"history": "Bull Analyst: case\nBear Analyst: case",
"current_response": "Recommendation: HOLD",
"judge_decision": "Recommendation: HOLD",
"research_status": "degraded",
"research_mode": "degraded_synthesis",
"timed_out_nodes": ["Bull Researcher"],
"degraded_reason": "bull_researcher_timeout",
"covered_dimensions": ["market"],
"manager_confidence": 0.0,
},
"trader_investment_plan": "",
"risk_debate_state": {
"aggressive_history": "",
"conservative_history": "",
"neutral_history": "",
"history": "",
"judge_decision": "",
},
"investment_plan": "Recommendation: HOLD",
"final_trade_decision": "HOLD",
}
TradingAgentsGraph._log_state(graph, "2026-04-11", final_state)
log_path = tmp_path / "AAPL" / "TradingAgentsStrategy_logs" / "full_states_log_2026-04-11.json"
payload = json.loads(log_path.read_text(encoding="utf-8"))
assert payload["investment_debate_state"]["research_status"] == "degraded"
assert payload["investment_debate_state"]["research_mode"] == "degraded_synthesis"
assert payload["investment_debate_state"]["timed_out_nodes"] == ["Bull Researcher"]
assert payload["investment_debate_state"]["manager_confidence"] == 0.0

View File

@ -1,8 +1,31 @@
from typing import Annotated, Optional
from typing import Annotated, Any, Mapping, Optional
from typing_extensions import NotRequired, TypedDict
from langgraph.graph import MessagesState
RESEARCH_PROVENANCE_FIELDS = (
"research_status",
"research_mode",
"timed_out_nodes",
"degraded_reason",
"covered_dimensions",
"manager_confidence",
)
def extract_research_provenance(
debate_state: Mapping[str, Any] | None,
) -> dict[str, Any] | None:
if not isinstance(debate_state, Mapping):
return None
metadata = {
key: debate_state.get(key)
for key in RESEARCH_PROVENANCE_FIELDS
if key in debate_state
}
return metadata or None
# Researcher team state
class InvestDebateState(TypedDict, total=False):
bull_history: Annotated[

View File

@ -18,6 +18,7 @@ from tradingagents.agents.utils.agent_states import (
AgentState,
InvestDebateState,
RiskDebateState,
extract_research_provenance,
)
from tradingagents.dataflows.config import set_config
@ -285,6 +286,12 @@ class TradingAgentsGraph:
"judge_decision": final_state["investment_debate_state"][
"judge_decision"
],
**(
extract_research_provenance(
final_state.get("investment_debate_state")
)
or {}
),
},
"trader_investment_decision": final_state["trader_investment_plan"],
"risk_debate_state": {

View File

@ -1,5 +1,6 @@
import time
from tradingagents.agents.utils.agent_states import extract_research_provenance
import tradingagents.graph.setup as graph_setup_module
from tradingagents.graph.setup import GraphSetup
@ -207,3 +208,31 @@ def test_guard_timeout_returns_without_waiting_for_node_completion(monkeypatch):
assert debate["research_status"] == "degraded"
assert debate["research_mode"] == "degraded_synthesis"
assert debate["timed_out_nodes"] == ["Bull Researcher"]
def test_extract_research_provenance_returns_subset():
payload = extract_research_provenance(
{
"research_status": "degraded",
"research_mode": "degraded_synthesis",
"timed_out_nodes": ["Bull Researcher"],
"degraded_reason": "bull_researcher_timeout",
"covered_dimensions": ["market", "bull"],
"manager_confidence": 0.0,
"history": "ignored",
}
)
assert payload == {
"research_status": "degraded",
"research_mode": "degraded_synthesis",
"timed_out_nodes": ["Bull Researcher"],
"degraded_reason": "bull_researcher_timeout",
"covered_dimensions": ["market", "bull"],
"manager_confidence": 0.0,
}
def test_extract_research_provenance_ignores_non_mapping():
assert extract_research_provenance(None) is None
assert extract_research_provenance("bad") is None