mirror of
https://github.com/arkorty/B.Tech-Project-III.git
synced 2026-04-19 12:41:48 +00:00
44 lines
1.3 KiB
Python
44 lines
1.3 KiB
Python
"""Utilities for robustly parsing JSON from LLM responses."""
|
|
|
|
import json
|
|
import re
|
|
|
|
|
|
def extract_json_object(content: str) -> dict:
|
|
"""Extract and parse the first JSON object from raw LLM output."""
|
|
text = (content or "").strip()
|
|
if not text:
|
|
raise json.JSONDecodeError("Empty LLM response", text, 0)
|
|
|
|
if text.startswith("```"):
|
|
text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.IGNORECASE)
|
|
text = re.sub(r"\s*```$", "", text)
|
|
|
|
text = text.strip()
|
|
if not text:
|
|
raise json.JSONDecodeError("Empty LLM response after cleanup", text, 0)
|
|
|
|
decoder = json.JSONDecoder()
|
|
|
|
# Direct parse for pure JSON responses.
|
|
try:
|
|
parsed = json.loads(text)
|
|
if isinstance(parsed, dict):
|
|
return parsed
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
# Try to decode from each object start. This handles wrapper text more
|
|
# reliably than regex, especially with nested braces.
|
|
for idx, ch in enumerate(text):
|
|
if ch != "{":
|
|
continue
|
|
try:
|
|
parsed, _ = decoder.raw_decode(text[idx:])
|
|
if isinstance(parsed, dict):
|
|
return parsed
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
raise json.JSONDecodeError("No valid top-level JSON object found", text, 0)
|