Skip to content

json_schema_utils

Utilities for handling JSON schema compatibility.

preprocess_schema_for_union_types(schema, ensure_ascii=True)

Preprocess a JSON schema to handle union types (array type specifications).

This is a temporary workaround for the limitation in outlines-core 0.1.26 which doesn't support JSON schema type arrays like ["string", "null"]. This function converts such arrays into the equivalent anyOf format.

Parameters:

Name Type Description Default
schema Union[str, dict]

The JSON schema as a string or dictionary

required
ensure_ascii bool

Whether to ensure the output JSON is ASCII-only

True

Returns:

Type Description
str

The preprocessed JSON schema string

Examples:

>>> schema = {"type": ["string", "null"]}
>>> preprocess_schema_for_union_types(schema)
'{"anyOf":[{"type":"string"},{"type":"null"}]}'
Source code in outlines/types/json_schema_utils.py
def preprocess_schema_for_union_types(
    schema: Union[str, dict], ensure_ascii: bool = True
) -> str:
    """
    Preprocess a JSON schema to handle union types (array type specifications).

    This is a temporary workaround for the limitation in outlines-core 0.1.26
    which doesn't support JSON schema type arrays like ["string", "null"].
    This function converts such arrays into the equivalent anyOf format.

    Parameters
    ----------
    schema
        The JSON schema as a string or dictionary
    ensure_ascii
        Whether to ensure the output JSON is ASCII-only

    Returns
    -------
    str
        The preprocessed JSON schema string

    Examples
    --------
    >>> schema = {"type": ["string", "null"]}
    >>> preprocess_schema_for_union_types(schema)
    '{"anyOf":[{"type":"string"},{"type":"null"}]}'
    """
    # Convert to dict if string
    if isinstance(schema, str):
        original_str = schema
        try:
            schema_dict = json.loads(schema)
        except (json.JSONDecodeError, ValueError):
            # If JSON parsing fails, return original string unchanged
            # This preserves original error handling behavior
            return original_str
    else:
        original_str = None
        schema_dict = schema

    # Process the schema
    preprocessed = _convert_type_arrays_to_anyof(schema_dict)

    # If no changes were made, return the original string (if it was a string)
    if preprocessed == schema_dict and original_str is not None: # pragma: no cover
        return original_str

    # Return as JSON string with proper formatting
    return json.dumps(preprocessed, ensure_ascii=ensure_ascii, separators=(",", ":"))