Skip to content

Json parser

clean_json_string(json_str)

Clean a JSON-like string so that it can be parsed by the built-in json module.

  1. Remove single-line (//...) and multi-line (/.../) comments outside of string literals.
  2. Convert single-quoted string literals to double-quoted ones, preserving internal apostrophes.
  3. Replace Python-specific boolean/null literals (True, False, None) with their JSON equivalents (true, false, null).
  4. Remove trailing commas in objects and arrays.

Parameters:

Name Type Description Default
json_str str

The raw JSON-like string to clean.

required

Returns:

Type Description
str

A cleaned JSON string suitable for json.loads.

Source code in dynamiq/utils/json_parser.py
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
def clean_json_string(json_str: str) -> str:
    """
    Clean a JSON-like string so that it can be parsed by the built-in `json` module.

    1. Remove single-line (//...) and multi-line (/*...*/) comments outside of
       string literals.
    2. Convert single-quoted string literals to double-quoted ones, preserving
       internal apostrophes.
    3. Replace Python-specific boolean/null literals (True, False, None) with their
       JSON equivalents (true, false, null).
    4. Remove trailing commas in objects and arrays.

    Args:
        json_str: The raw JSON-like string to clean.

    Returns:
        A cleaned JSON string suitable for json.loads.
    """
    # 1. Remove comments
    json_str = _remove_comments_outside_strings(json_str)

    # 2. Convert single‐quoted string literals -> double‐quoted
    pattern = r"'((?:\\'|[^'])*)'"
    json_str = re.sub(pattern, single_quoted_replacer, json_str)

    # 3. Replace Python-specific boolean/null with JSON equivalents
    json_str = re.sub(r"\bTrue\b", "true", json_str)
    json_str = re.sub(r"\bFalse\b", "false", json_str)
    json_str = re.sub(r"\bNone\b", "null", json_str)

    # 4. Remove trailing commas before a closing bracket or brace
    json_str = re.sub(r",\s*(\]|\})", r"\1", json_str)

    return json_str

extract_json_string(s)

Extract the first JSON object or array from the string by balancing brackets. The function looks for '{' or '[' and keeps track of nested brackets until they are balanced, returning the substring that contains the complete JSON.

Parameters:

Name Type Description Default
s str

The input string potentially containing a JSON object or array.

required

Returns:

Type Description
str | None

The extracted JSON string if found and balanced, otherwise None.

Source code in dynamiq/utils/json_parser.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def extract_json_string(s: str) -> str | None:
    """
    Extract the first JSON object or array from the string by balancing brackets.
    The function looks for '{' or '[' and keeps track of nested brackets until
    they are balanced, returning the substring that contains the complete JSON.

    Args:
        s: The input string potentially containing a JSON object or array.

    Returns:
        The extracted JSON string if found and balanced, otherwise None.
    """
    bracket_stack: list[str] = []
    start_index: int | None = None
    in_string = False
    escape = False

    for i, char in enumerate(s):
        # Toggle in_string when encountering an unescaped double quote
        if char == '"' and not escape:
            in_string = not in_string
        elif char == "\\" and not escape:
            escape = True
            continue

        if not in_string:
            if char in "{[":
                if not bracket_stack:
                    start_index = i
                bracket_stack.append(char)
            elif char in "}]":
                if bracket_stack:
                    opening_bracket = bracket_stack.pop()
                    if (opening_bracket == "{" and char != "}") or (opening_bracket == "[" and char != "]"):
                        # Mismatched brackets
                        return None
                    # If stack is empty, we've balanced everything
                    if not bracket_stack and start_index is not None:
                        return s[start_index : i + 1]
                else:
                    # Found a closing bracket without a matching opener
                    return None
        escape = False

    # If brackets never fully balanced, return None
    return None

parse_llm_json_output(response)

Attempt to parse the received LLM output into a JSON object or array. If direct parsing fails, looks for the first balanced JSON substring, then tries corrections.

Parameters:

Name Type Description Default
response str

The raw output from the LLM.

required

Returns:

Type Description
dict[str, Any] | list[Any]

A Python dict or list representing the parsed JSON.

Raises:

Type Description
ValueError

If the output cannot be parsed into valid JSON.

Source code in dynamiq/utils/json_parser.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def parse_llm_json_output(response: str) -> dict[str, Any] | list[Any]:
    """
    Attempt to parse the received LLM output into a JSON object or array.
    If direct parsing fails, looks for the first balanced JSON substring,
    then tries corrections.

    Args:
        response: The raw output from the LLM.

    Returns:
        A Python dict or list representing the parsed JSON.

    Raises:
        ValueError: If the output cannot be parsed into valid JSON.
    """
    # Try directly parsing
    try:
        return json.loads(response)
    except json.JSONDecodeError:
        pass

    # Attempt bracket extraction
    json_str = extract_json_string(response)
    if not json_str:
        raise ValueError(f"Response from LLM is not valid JSON: {response}")

    # Try parsing the extracted substring
    try:
        return json.loads(json_str)
    except json.JSONDecodeError:
        pass

    # Clean and try again
    cleaned_json_str = clean_json_string(json_str)
    try:
        return json.loads(cleaned_json_str)
    except json.JSONDecodeError as e:
        raise ValueError(f"Failed to parse JSON after corrections: {e}")

single_quoted_replacer(match)

A helper function for clean_json_string to replace single-quoted JSON-like string literals with double-quoted equivalents, preserving internal apostrophes.

Parameters:

Name Type Description Default
match Match[str]

The regular expression match object for a single-quoted string.

required

Returns:

Type Description
str

The corresponding double-quoted string literal.

Source code in dynamiq/utils/json_parser.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def single_quoted_replacer(match: Match[str]) -> str:
    """
    A helper function for clean_json_string to replace single-quoted JSON-like
    string literals with double-quoted equivalents, preserving internal
    apostrophes.

    Args:
        match: The regular expression match object for a single-quoted string.

    Returns:
        The corresponding double-quoted string literal.
    """
    content = match.group(1)
    # Convert escaped \' to an actual apostrophe
    content = content.replace("\\'", "'")
    # Escape any double quotes inside
    content = content.replace('"', '\\"')
    return f'"{content}"'