"""Utilities for robustly parsing JSON from LLM responses.""" import json import re def extract_json_object(content: str) -> dict: """Extract and parse the first JSON object from raw LLM output.""" text = (content or "").strip() if not text: raise json.JSONDecodeError("Empty LLM response", text, 0) if text.startswith("```"): text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.IGNORECASE) text = re.sub(r"\s*```$", "", text) text = text.strip() if not text: raise json.JSONDecodeError("Empty LLM response after cleanup", text, 0) decoder = json.JSONDecoder() # Direct parse for pure JSON responses. try: parsed = json.loads(text) if isinstance(parsed, dict): return parsed except json.JSONDecodeError: pass # Try to decode from each object start. This handles wrapper text more # reliably than regex, especially with nested braces. for idx, ch in enumerate(text): if ch != "{": continue try: parsed, _ = decoder.raw_decode(text[idx:]) if isinstance(parsed, dict): return parsed except json.JSONDecodeError: continue raise json.JSONDecodeError("No valid top-level JSON object found", text, 0)