"""Tests for robust JSON extraction from LLM output.""" import pytest from tradingagents.agents.utils.json_utils import extract_json # ─── Happy-path tests ───────────────────────────────────────────────────────── def test_pure_json(): assert extract_json('{"key": "value"}') == {"key": "value"} def test_json_with_whitespace(): assert extract_json(' \n{"key": "value"}\n ') == {"key": "value"} def test_markdown_fence_json(): text = '```json\n{"key": "value"}\n```' assert extract_json(text) == {"key": "value"} def test_markdown_fence_no_lang(): text = '```\n{"key": "value"}\n```' assert extract_json(text) == {"key": "value"} def test_think_preamble_only(): text = 'I need to analyze the macro environment carefully.\n{"key": "value"}' assert extract_json(text) == {"key": "value"} def test_think_plus_fence(): text = 'Some reasoning here.\n```json\n{"key": "value"}\n```' assert extract_json(text) == {"key": "value"} def test_prose_with_json(): text = 'Here is the result:\n{"key": "value"}\nDone.' assert extract_json(text) == {"key": "value"} def test_nested_json(): data = { "timeframe": "1 month", "executive_summary": "Strong growth momentum", "macro_context": { "economic_cycle": "expansion", "central_bank_stance": "hawkish", "geopolitical_risks": ["trade tensions", "energy prices"], }, "key_themes": [ {"theme": "AI Infrastructure", "description": "Data center boom", "conviction": "high", "timeframe": "3-6 months"} ], "stocks_to_investigate": [ { "ticker": "NVDA", "name": "NVIDIA Corp", "sector": "Technology", "rationale": "GPU demand for AI training", "thesis_angle": "growth", "conviction": "high", "key_catalysts": ["H100 demand", "Blackwell launch"], "risks": ["Supply constraints", "Competition"], } ], "risk_factors": ["Fed rate hikes", "China tensions"], } import json text = json.dumps(data) result = extract_json(text) assert result["timeframe"] == "1 month" assert result["stocks_to_investigate"][0]["ticker"] == "NVDA" def test_deepseek_r1_realistic(): """Simulate a real DeepSeek R1 response with think block and JSON fence.""" text = ( "\n" "Let me analyze the macro environment. The geopolitical scanner shows tension...\n" "I need to identify the top 8-10 stocks.\n" "\n" "```json\n" '{"timeframe": "1 month", "executive_summary": "Bullish macro backdrop", ' '"macro_context": {"economic_cycle": "expansion", "central_bank_stance": "neutral", "geopolitical_risks": []}, ' '"key_themes": [], "stocks_to_investigate": [{"ticker": "AAPL", "name": "Apple", "sector": "Technology", ' '"rationale": "Strong cash flows", "thesis_angle": "value", "conviction": "high", ' '"key_catalysts": ["Services growth"], "risks": ["China sales"]}], "risk_factors": []}\n' "```" ) result = extract_json(text) assert result["timeframe"] == "1 month" assert result["stocks_to_investigate"][0]["ticker"] == "AAPL" def test_preamble_and_postamble(): """JSON buried in prose before and after.""" text = 'Based on my analysis of the market data:\n\n{"result": 42}\n\nThis concludes my analysis.' assert extract_json(text) == {"result": 42} # ─── Error cases ────────────────────────────────────────────────────────────── def test_empty_input(): with pytest.raises(ValueError, match="Empty input"): extract_json("") def test_whitespace_only(): with pytest.raises(ValueError, match="Empty input"): extract_json(" \n\t ") def test_malformed_json_no_fallback(): with pytest.raises(ValueError): extract_json('{"key": value_without_quotes}') def test_no_json_at_all(): with pytest.raises(ValueError): extract_json("Just some text with no JSON structure at all") def test_array_input_raises_value_error(): """extract_json rejects JSON arrays — only dicts are accepted. All callers (macro_synthesis, macro_bridge, CLI) call .get() on the result, so returning a list would cause AttributeError downstream. The function enforces dict-only return at runtime. """ with pytest.raises(ValueError, match="Expected a JSON object"): extract_json('[1, 2, 3]')