Files
2026-04-05 00:43:23 +05:30

44 lines
1.3 KiB
Python

"""Utilities for robustly parsing JSON from LLM responses."""
import json
import re
def extract_json_object(content: str) -> dict:
"""Extract and parse the first JSON object from raw LLM output."""
text = (content or "").strip()
if not text:
raise json.JSONDecodeError("Empty LLM response", text, 0)
if text.startswith("```"):
text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.IGNORECASE)
text = re.sub(r"\s*```$", "", text)
text = text.strip()
if not text:
raise json.JSONDecodeError("Empty LLM response after cleanup", text, 0)
decoder = json.JSONDecoder()
# Direct parse for pure JSON responses.
try:
parsed = json.loads(text)
if isinstance(parsed, dict):
return parsed
except json.JSONDecodeError:
pass
# Try to decode from each object start. This handles wrapper text more
# reliably than regex, especially with nested braces.
for idx, ch in enumerate(text):
if ch != "{":
continue
try:
parsed, _ = decoder.raw_decode(text[idx:])
if isinstance(parsed, dict):
return parsed
except json.JSONDecodeError:
continue
raise json.JSONDecodeError("No valid top-level JSON object found", text, 0)