Merge pull request #38 from aguzererler/perf-optimize-json-extraction-regex-3266597981569245080

 Pre-compile regex in JSON extraction utility
This commit is contained in:
ahmet guzererler 2026-03-21 02:36:33 +01:00 committed by GitHub
commit 279081d3c7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 6 additions and 3 deletions

View File

@ -6,6 +6,10 @@ import json
import re
from typing import Any
# Pre-compiled regex patterns for better performance
THINK_PATTERN = re.compile(r"<think>.*?</think>", re.DOTALL)
FENCE_PATTERN = re.compile(r"```(?:json)?\s*\n?(.*?)\n?\s*```", re.DOTALL)
def extract_json(text: str) -> dict[str, Any]:
"""Extract a JSON object from LLM output that may contain markdown fences,
@ -44,7 +48,7 @@ def extract_json(text: str) -> dict[str, Any]:
pass
# 2. Strip <think>...</think> blocks (DeepSeek R1)
cleaned = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
cleaned = THINK_PATTERN.sub("", text).strip()
# Try again after stripping think blocks
try:
@ -53,8 +57,7 @@ def extract_json(text: str) -> dict[str, Any]:
pass
# 3. Extract from markdown code fences
fence_pattern = r"```(?:json)?\s*\n?(.*?)\n?\s*```"
fences = re.findall(fence_pattern, cleaned, re.DOTALL)
fences = FENCE_PATTERN.findall(cleaned)
for block in fences:
try:
return _ensure_dict(json.loads(block.strip()))