fix: aggressive JSON fallback for LLMs that don't support structured output

- Use SystemMessage forcing JSON-only responses (no markdown)
- Provide concrete example JSON from schema fields
- Better JSON extraction: handles nested braces, code blocks, prose wrappers
- Fixes nemotron/minimax/other Ollama models returning prose instead of JSON

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
dtarkent2-sys 2026-04-02 15:39:08 -04:00
parent 1d3f5e9c86
commit ac17d98974
1 changed files with 72 additions and 17 deletions

View File

@ -449,25 +449,84 @@ def should_hard_veto(
# LLM structured output helper
# ---------------------------------------------------------------------------
def invoke_structured(llm, model_cls, prompt: str, timeout: int = 60):
"""Call LLM with structured output, with JSON fallback.
def _build_json_example(model_cls) -> str:
"""Build a minimal JSON example from a Pydantic model's fields."""
examples = {}
schema = model_cls.model_json_schema()
props = schema.get("properties", {})
for field_name, field_info in props.items():
ftype = field_info.get("type", "string")
if ftype == "number" or ftype == "integer":
examples[field_name] = 5.0
elif ftype == "boolean":
examples[field_name] = True
elif ftype == "array":
examples[field_name] = ["example item"]
else:
examples[field_name] = "your analysis here"
return json.dumps(examples, indent=2)
Each LLM call is wrapped in a per-call timeout (default 60s) to avoid
hanging on a single call while the global 60-minute analysis timeout
covers the entire pipeline.
def _extract_json(text: str) -> str:
"""Extract JSON from LLM response that may contain markdown or prose."""
text = text.strip()
# Try direct parse first
if text.startswith("{"):
return text
# Extract from ```json blocks
if "```json" in text:
return text.split("```json")[1].split("```")[0].strip()
if "```" in text:
parts = text.split("```")
for part in parts[1::2]: # odd indices are inside code blocks
candidate = part.strip()
if candidate.startswith("{"):
return candidate
# Find first { ... } block
start = text.find("{")
if start >= 0:
depth = 0
for i in range(start, len(text)):
if text[i] == "{": depth += 1
elif text[i] == "}": depth -= 1
if depth == 0:
return text[start:i+1]
return text
def invoke_structured(llm, model_cls, prompt: str, timeout: int = 60):
"""Call LLM with structured output, with aggressive JSON fallback.
Strategy:
1. Try langchain structured output (works with OpenAI, Anthropic)
2. If that fails, use JSON-only system prompt with schema + example
3. Extract JSON from any markdown/prose wrapper
4. Fall back to defaults if all else fails
"""
import concurrent.futures
from langchain_core.messages import SystemMessage, HumanMessage
def _call_structured():
structured = llm.with_structured_output(model_cls)
return structured.invoke(prompt)
def _call_json_fallback():
def _call_json_direct():
"""Force JSON output with aggressive system prompt and concrete example."""
schema_str = json.dumps(model_cls.model_json_schema(), indent=2)
json_prompt = (
f"{prompt}\n\nReturn ONLY valid JSON matching this schema:\n{schema_str}"
)
return llm.invoke(json_prompt)
example_str = _build_json_example(model_cls)
messages = [
SystemMessage(content=(
"You are a JSON-only API. You MUST respond with a single valid JSON object. "
"No markdown, no commentary, no explanation, no ```json blocks. "
"Start your response with { and end with }. Nothing else."
)),
HumanMessage(content=(
f"{prompt}\n\n"
f"Respond with ONLY a JSON object matching this schema:\n{schema_str}\n\n"
f"Example format (fill in real values):\n{example_str}"
)),
]
return llm.invoke(messages)
# Try structured output with per-call timeout
try:
@ -480,20 +539,16 @@ def invoke_structured(llm, model_cls, prompt: str, timeout: int = 60):
except Exception as e:
logger.warning("Structured output failed for %s: %s — using JSON fallback", model_cls.__name__, e)
# JSON fallback with per-call timeout
# JSON-direct fallback with per-call timeout
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(_call_json_fallback)
future = pool.submit(_call_json_direct)
response = future.result(timeout=timeout)
except concurrent.futures.TimeoutError:
logger.warning("JSON fallback timed out after %ds for %s", timeout, model_cls.__name__)
raise TimeoutError(f"LLM JSON fallback timed out after {timeout}s for {model_cls.__name__}")
content = response.content.strip()
if "```json" in content:
content = content.split("```json")[1].split("```")[0].strip()
elif "```" in content:
content = content.split("```")[1].split("```")[0].strip()
content = _extract_json(response.content)
try:
return model_cls.model_validate_json(content)
except ValidationError as ve: