* feat: add extract_json() utility for robust LLM JSON parsing Handles DeepSeek R1 <think> blocks, markdown code fences, and preamble/postamble text that LLMs wrap around JSON output. Applied to macro_synthesis, macro_bridge, and CLI scan output. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: opt-in vendor fallback — fail-fast by default (ADR 011) Silent cross-vendor fallback corrupts signal quality when data contracts differ (e.g., AV news has sentiment scores yfinance lacks). Only methods with fungible data contracts (OHLCV, indices, sector/industry perf, market movers) now get fallback. All others raise immediately. - Add FALLBACK_ALLOWED whitelist to interface.py - Rewrite route_to_vendor() with fail-fast/fallback branching - Improve error messages with method name, vendors tried, and exception chaining - Add 11 new tests in test_vendor_failfast.py - Update ADRs 002 (superseded), 008, 010; create ADR 011 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Initial plan * fix: address PR #18 review findings - type safety, import ordering, EOF newline Co-authored-by: aguzererler <6199053+aguzererler@users.noreply.github.com> * fix: add inline comments explaining combined ValueError catch in json_utils.py Co-authored-by: aguzererler <6199053+aguzererler@users.noreply.github.com> --------- Co-authored-by: Ahmet Guzererler <guzererler@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: aguzererler <6199053+aguzererler@users.noreply.github.com>
This commit is contained in:
parent
fa8a0d56fb
commit
3b6e399563
|
|
@ -119,13 +119,12 @@ def test_no_json_at_all():
|
||||||
extract_json("Just some text with no JSON structure at all")
|
extract_json("Just some text with no JSON structure at all")
|
||||||
|
|
||||||
|
|
||||||
def test_array_input_returns_list():
|
def test_array_input_raises_value_error():
|
||||||
"""extract_json succeeds on JSON arrays — json.loads parses them as lists.
|
"""extract_json rejects JSON arrays — only dicts are accepted.
|
||||||
|
|
||||||
The function's return-type annotation says dict, but the implementation does
|
All callers (macro_synthesis, macro_bridge, CLI) call .get() on the result,
|
||||||
not enforce this at runtime. A JSON array is valid JSON, so step 1
|
so returning a list would cause AttributeError downstream. The function
|
||||||
(direct json.loads) succeeds and returns a list. Callers that need a dict
|
enforces dict-only return at runtime.
|
||||||
must validate the returned type themselves.
|
|
||||||
"""
|
"""
|
||||||
result = extract_json('[1, 2, 3]')
|
with pytest.raises(ValueError, match="Expected a JSON object"):
|
||||||
assert result == [1, 2, 3]
|
extract_json('[1, 2, 3]')
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||||
|
|
||||||
from tradingagents.agents.utils.json_utils import extract_json
|
from tradingagents.agents.utils.json_utils import extract_json
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
||||||
|
|
||||||
|
|
||||||
def create_macro_synthesis(llm):
|
def create_macro_synthesis(llm):
|
||||||
def macro_synthesis_node(state):
|
def macro_synthesis_node(state):
|
||||||
|
|
|
||||||
|
|
@ -30,9 +30,16 @@ def extract_json(text: str) -> dict[str, Any]:
|
||||||
if not text or not text.strip():
|
if not text or not text.strip():
|
||||||
raise ValueError("Empty input — no JSON to extract")
|
raise ValueError("Empty input — no JSON to extract")
|
||||||
|
|
||||||
|
def _ensure_dict(obj: object) -> dict[str, Any]:
|
||||||
|
if not isinstance(obj, dict):
|
||||||
|
raise ValueError(
|
||||||
|
f"Expected a JSON object (dict), got {type(obj).__name__}"
|
||||||
|
)
|
||||||
|
return obj
|
||||||
|
|
||||||
# 1. Direct parse
|
# 1. Direct parse
|
||||||
try:
|
try:
|
||||||
return json.loads(text)
|
return _ensure_dict(json.loads(text))
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -41,7 +48,7 @@ def extract_json(text: str) -> dict[str, Any]:
|
||||||
|
|
||||||
# Try again after stripping think blocks
|
# Try again after stripping think blocks
|
||||||
try:
|
try:
|
||||||
return json.loads(cleaned)
|
return _ensure_dict(json.loads(cleaned))
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -50,8 +57,9 @@ def extract_json(text: str) -> dict[str, Any]:
|
||||||
fences = re.findall(fence_pattern, cleaned, re.DOTALL)
|
fences = re.findall(fence_pattern, cleaned, re.DOTALL)
|
||||||
for block in fences:
|
for block in fences:
|
||||||
try:
|
try:
|
||||||
return json.loads(block.strip())
|
return _ensure_dict(json.loads(block.strip()))
|
||||||
except json.JSONDecodeError:
|
except (json.JSONDecodeError, ValueError):
|
||||||
|
# JSONDecodeError = bad JSON; ValueError = parsed but not a dict
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 4. Find first '{' to last '}'
|
# 4. Find first '{' to last '}'
|
||||||
|
|
@ -59,8 +67,9 @@ def extract_json(text: str) -> dict[str, Any]:
|
||||||
last_brace = cleaned.rfind("}")
|
last_brace = cleaned.rfind("}")
|
||||||
if first_brace != -1 and last_brace > first_brace:
|
if first_brace != -1 and last_brace > first_brace:
|
||||||
try:
|
try:
|
||||||
return json.loads(cleaned[first_brace : last_brace + 1])
|
return _ensure_dict(json.loads(cleaned[first_brace : last_brace + 1]))
|
||||||
except json.JSONDecodeError:
|
except (json.JSONDecodeError, ValueError):
|
||||||
|
# JSONDecodeError = bad JSON; ValueError = parsed but not a dict
|
||||||
pass
|
pass
|
||||||
|
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
|
|
|
||||||
|
|
@ -256,4 +256,5 @@ def route_to_vendor(method: str, *args, **kwargs):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
error_msg = f"All vendors failed for '{method}' (tried: {', '.join(tried)})"
|
error_msg = f"All vendors failed for '{method}' (tried: {', '.join(tried)})"
|
||||||
raise RuntimeError(error_msg) from last_error
|
raise RuntimeError(error_msg) from last_error
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue