perf: pre-compile regex patterns in extract_json util
Compile THINK_PATTERN and FENCE_PATTERN at the module level to improve the performance of extract_json by avoiding redundant regex compilation during function calls. Measured a ~5% performance improvement in an isolated benchmark. - Baseline: 51.98 us/call - Optimized: 49.26 us/call Co-authored-by: aguzererler <6199053+aguzererler@users.noreply.github.com>
This commit is contained in:
parent
3e9322ae4b
commit
646fe40754
|
|
@ -6,6 +6,10 @@ import json
|
||||||
import re
|
import re
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
# Pre-compiled regex patterns for better performance
|
||||||
|
THINK_PATTERN = re.compile(r"<think>.*?</think>", re.DOTALL)
|
||||||
|
FENCE_PATTERN = re.compile(r"```(?:json)?\s*\n?(.*?)\n?\s*```", re.DOTALL)
|
||||||
|
|
||||||
|
|
||||||
def extract_json(text: str) -> dict[str, Any]:
|
def extract_json(text: str) -> dict[str, Any]:
|
||||||
"""Extract a JSON object from LLM output that may contain markdown fences,
|
"""Extract a JSON object from LLM output that may contain markdown fences,
|
||||||
|
|
@ -44,7 +48,7 @@ def extract_json(text: str) -> dict[str, Any]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# 2. Strip <think>...</think> blocks (DeepSeek R1)
|
# 2. Strip <think>...</think> blocks (DeepSeek R1)
|
||||||
cleaned = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
|
cleaned = THINK_PATTERN.sub("", text).strip()
|
||||||
|
|
||||||
# Try again after stripping think blocks
|
# Try again after stripping think blocks
|
||||||
try:
|
try:
|
||||||
|
|
@ -53,8 +57,7 @@ def extract_json(text: str) -> dict[str, Any]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# 3. Extract from markdown code fences
|
# 3. Extract from markdown code fences
|
||||||
fence_pattern = r"```(?:json)?\s*\n?(.*?)\n?\s*```"
|
fences = FENCE_PATTERN.findall(cleaned)
|
||||||
fences = re.findall(fence_pattern, cleaned, re.DOTALL)
|
|
||||||
for block in fences:
|
for block in fences:
|
||||||
try:
|
try:
|
||||||
return _ensure_dict(json.loads(block.strip()))
|
return _ensure_dict(json.loads(block.strip()))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue