Skip to content

types

Output types for structured generation and regex DSL.

airports

Generate valid airport codes.

countries

Generate valid country codes and names.

get_country_flags()

Generate Unicode flags for all ISO 3166-1 alpha-2 country codes in Alpha2 Enum.

Source code in outlines/types/countries.py
def get_country_flags():
    """Generate Unicode flags for all ISO 3166-1 alpha-2 country codes in Alpha2 Enum."""
    base = ord("🇦")
    return {
        code.name: chr(base + ord(code.name[0]) - ord("A"))
        + chr(base + ord(code.name[1]) - ord("A"))
        for code in Alpha2
    }

dsl

Regular expression DSL and output types for structured generation.

This module contains elements related to three logical steps in the use of output types for structured generation:

  1. Definition of Term classes that contain output type definitions. That includes both terms intended to be used by themselves such as JsonSchema or CFG and terms that are part of the regular expression DSL such as Alternatives or KleeneStar (and the related functions).
  2. Conversion of Python types into Term instances (python_types_to_terms).
  3. Conversion of a Term instance into a regular expression (to_regex).

CFG dataclass

Bases: Term

Class representing a context-free grammar.

Parameters:

Name Type Description Default
definition str

The definition of the context-free grammar as a string.

required
Source code in outlines/types/dsl.py
@dataclass
class CFG(Term):
    """Class representing a context-free grammar.

    Parameters
    ----------
    definition
        The definition of the context-free grammar as a string.

    """
    definition: str

    def _display_node(self) -> str:
        return f"CFG('{self.definition}')"

    def __repr__(self):
        return f"CFG(definition='{self.definition}')"

    def __eq__(self, other):
        if not isinstance(other, CFG):
            return False
        return self.definition == other.definition

    @classmethod
    def from_file(cls, path: str) -> "CFG":
        """Create a CFG instance from a file containing a CFG definition.

        Parameters
        ----------
        path : str
            The path to the file containing the CFG definition.
        Returns
        -------
        CFG
            A CFG instance.

        """
        with open(path, "r") as f:
            definition = f.read()
        return cls(definition)

from_file(path) classmethod

Create a CFG instance from a file containing a CFG definition.

Parameters:

Name Type Description Default
path str

The path to the file containing the CFG definition.

required

Returns:

Type Description
CFG

A CFG instance.

Source code in outlines/types/dsl.py
@classmethod
def from_file(cls, path: str) -> "CFG":
    """Create a CFG instance from a file containing a CFG definition.

    Parameters
    ----------
    path : str
        The path to the file containing the CFG definition.
    Returns
    -------
    CFG
        A CFG instance.

    """
    with open(path, "r") as f:
        definition = f.read()
    return cls(definition)

FSM dataclass

Bases: Term

Class representing a finite state machine.

Parameters:

Name Type Description Default
fsm FSM

The finite state machine to store. This object must be an instance of interegular.fsm.FSM.

required
Source code in outlines/types/dsl.py
@dataclass
class FSM(Term):
    """Class representing a finite state machine.

    Parameters
    ----------
    fsm
        The finite state machine to store. This object must be an instance of
        `interegular.fsm.FSM`.

    """
    fsm: interegular.fsm.FSM

    def _display_node(self) -> str:
        return f"FSM({self.fsm.__repr__()})"

    def __repr__(self):
        return f"FSM(fsm={self.fsm.__repr__()})"

JsonSchema

Bases: Term

Class representing a JSON schema.

The JSON schema object from which to instantiate the class can be a dictionary, a string, a Pydantic model, a typed dict, a dataclass, or a genSON schema builder.

Source code in outlines/types/dsl.py
class JsonSchema(Term):
    """Class representing a JSON schema.

    The JSON schema object from which to instantiate the class can be a
    dictionary, a string, a Pydantic model, a typed dict, a dataclass, or a
    genSON schema builder.

    """
    def __init__(
        self,
        schema: Union[
            dict, str, type[BaseModel], _TypedDictMeta, type, SchemaBuilder
        ],
        whitespace_pattern: OptionalType[str] = None,
    ):
        """
        Parameters
        ----------
        schema
            The object containing the JSON schema.
        whitespace_pattern
            The pattern to use to match whitespace characters.

        """
        schema_str: str

        if is_dict_instance(schema):
            schema_str = json.dumps(schema)
        elif is_str_instance(schema):
            schema_str = str(schema)
        elif is_pydantic_model(schema):
            schema_str = json.dumps(schema.model_json_schema())  # type: ignore
        elif is_typed_dict(schema):
            schema_str = json.dumps(TypeAdapter(schema).json_schema())
        elif is_dataclass(schema):
            schema_str = json.dumps(TypeAdapter(schema).json_schema())
        elif is_genson_schema_builder(schema):
            schema_str = schema.to_json()  # type: ignore
        else:
            raise ValueError(
                f"Cannot parse schema {schema}. The schema must be either "
                + "a Pydantic class, typed dict, a dataclass, a genSON schema "
                + "builder or a string or dict that contains the JSON schema "
                + "specification"
            )

        self.schema = schema_str
        self.whitespace_pattern = whitespace_pattern

    def __post_init__(self):
        jsonschema.Draft7Validator.check_schema(json.loads(self.schema))

    def _display_node(self) -> str:
        return f"JsonSchema('{self.schema}')"

    def __repr__(self):
        return f"JsonSchema(schema='{self.schema}')"

    def __eq__(self, other):
        if not isinstance(other, JsonSchema):
            return False
        try:
            self_dict = json.loads(self.schema)
            other_dict = json.loads(other.schema)
            return self_dict == other_dict
        except json.JSONDecodeError:  # pragma: no cover
            return self.schema == other.schema

    @classmethod
    def from_file(cls, path: str) -> "JsonSchema":
        """Create a JsonSchema instance from a .json file containing a JSON
        schema.

        Parameters
        ----------
        path:
            The path to the file containing the JSON schema.
        Returns
        -------
        JsonSchema
            A JsonSchema instance.

        """
        with open(path, "r") as f:
            schema = json.load(f)
        return cls(schema)

__init__(schema, whitespace_pattern=None)

Parameters:

Name Type Description Default
schema Union[dict, str, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]

The object containing the JSON schema.

required
whitespace_pattern Optional[str]

The pattern to use to match whitespace characters.

None
Source code in outlines/types/dsl.py
def __init__(
    self,
    schema: Union[
        dict, str, type[BaseModel], _TypedDictMeta, type, SchemaBuilder
    ],
    whitespace_pattern: OptionalType[str] = None,
):
    """
    Parameters
    ----------
    schema
        The object containing the JSON schema.
    whitespace_pattern
        The pattern to use to match whitespace characters.

    """
    schema_str: str

    if is_dict_instance(schema):
        schema_str = json.dumps(schema)
    elif is_str_instance(schema):
        schema_str = str(schema)
    elif is_pydantic_model(schema):
        schema_str = json.dumps(schema.model_json_schema())  # type: ignore
    elif is_typed_dict(schema):
        schema_str = json.dumps(TypeAdapter(schema).json_schema())
    elif is_dataclass(schema):
        schema_str = json.dumps(TypeAdapter(schema).json_schema())
    elif is_genson_schema_builder(schema):
        schema_str = schema.to_json()  # type: ignore
    else:
        raise ValueError(
            f"Cannot parse schema {schema}. The schema must be either "
            + "a Pydantic class, typed dict, a dataclass, a genSON schema "
            + "builder or a string or dict that contains the JSON schema "
            + "specification"
        )

    self.schema = schema_str
    self.whitespace_pattern = whitespace_pattern

from_file(path) classmethod

Create a JsonSchema instance from a .json file containing a JSON schema.

Parameters:

Name Type Description Default
path str

The path to the file containing the JSON schema.

required

Returns:

Type Description
JsonSchema

A JsonSchema instance.

Source code in outlines/types/dsl.py
@classmethod
def from_file(cls, path: str) -> "JsonSchema":
    """Create a JsonSchema instance from a .json file containing a JSON
    schema.

    Parameters
    ----------
    path:
        The path to the file containing the JSON schema.
    Returns
    -------
    JsonSchema
        A JsonSchema instance.

    """
    with open(path, "r") as f:
        schema = json.load(f)
    return cls(schema)

Regex dataclass

Bases: Term

Class representing a regular expression.

Parameters:

Name Type Description Default
pattern str

The regular expression as a string.

required
Source code in outlines/types/dsl.py
@dataclass
class Regex(Term):
    """Class representing a regular expression.

    Parameters
    ----------
    pattern
        The regular expression as a string.

    """
    pattern: str

    def _display_node(self) -> str:
        return f"Regex('{self.pattern}')"

    def __repr__(self):
        return f"Regex(pattern='{self.pattern}')"

Term

Represents types defined with a regular expression.

Regex instances can be used as a type in a Pydantic model definittion. They will be translated to JSON Schema as a "string" field with the "pattern" keyword set to the regular expression this class represents. The class also handles validation.

Examples:

>>> from outlines.types import Regex
>>> from pydantic import BaseModel
>>>
>>> age_type = Regex("[0-9]+")
>>>
>>> class User(BaseModel):
>>>     name: str
>>>     age: age_type
Source code in outlines/types/dsl.py
class Term:
    """Represents types defined with a regular expression.

    `Regex` instances can be used as a type in a Pydantic model definittion.
    They will be translated to JSON Schema as a "string" field with the
    "pattern" keyword set to the regular expression this class represents. The
    class also handles validation.

    Examples
    --------

    >>> from outlines.types import Regex
    >>> from pydantic import BaseModel
    >>>
    >>> age_type = Regex("[0-9]+")
    >>>
    >>> class User(BaseModel):
    >>>     name: str
    >>>     age: age_type

    """

    def __add__(self: "Term", other: "Term") -> "Sequence":
        if is_str_instance(other):
            other = String(str(other))

        return Sequence([self, other])

    def __radd__(self: "Term", other: "Term") -> "Sequence":
        if is_str_instance(other):
            other = String(str(other))

        return Sequence([other, self])

    def __or__(self: "Term", other: "Term") -> "Alternatives":
        if is_str_instance(other):
            other = String(str(other))

        return Alternatives([self, other])

    def __ror__(self: "Term", other: "Term") -> "Alternatives":
        if is_str_instance(other):
            other = String(str(other))

        return Alternatives([other, self])

    def __get_validator__(self, _core_schema):
        def validate(input_value):
            return self.validate(input_value)

        return validate

    def __get_pydantic_core_schema__(
        self, source_type: Any, handler: GetCoreSchemaHandler
    ) -> cs.CoreSchema:
        return cs.no_info_plain_validator_function(lambda value: self.validate(value))

    def __get_pydantic_json_schema__(
        self, core_schema: cs.CoreSchema, handler: GetJsonSchemaHandler
    ) -> JsonSchemaValue:
        return {"type": "string", "pattern": to_regex(self)}

    def validate(self, value: str) -> str:
        pattern = to_regex(self)
        compiled = re.compile(pattern)
        if not compiled.fullmatch(str(value)):
            raise ValueError(
                f"Input should be in the language of the regular expression {pattern}"
            )
        return value

    def matches(self, value: str) -> bool:
        """Check that a given value is in the language defined by the Term.

        We make the assumption that the language defined by the term can
        be defined with a regular expression.

        """
        pattern = to_regex(self)
        compiled = re.compile(pattern)
        if compiled.fullmatch(str(value)):
            return True
        return False

    def display_ascii_tree(self, indent="", is_last=True) -> str:
        """Display the regex tree in ASCII format."""
        branch = "└── " if is_last else "├── "
        result = indent + branch + self._display_node() + "\n"

        # Calculate the new indent for children
        new_indent = indent + ("    " if is_last else "│   ")

        # Let each subclass handle its children
        result += self._display_children(new_indent)
        return result

    def _display_node(self):
        raise NotImplementedError

    def _display_children(self, indent: str) -> str:
        """Display the children of this node. Override in subclasses with children."""
        return ""

    def __str__(self):
        return self.display_ascii_tree()

    def optional(self) -> "Optional":
        return optional(self)

    def exactly(self, count: int) -> "QuantifyExact":
        return exactly(count, self)

    def at_least(self, count: int) -> "QuantifyMinimum":
        return at_least(count, self)

    def at_most(self, count: int) -> "QuantifyMaximum":
        return at_most(count, self)

    def between(self, min_count: int, max_count: int) -> "QuantifyBetween":
        return between(min_count, max_count, self)

    def one_or_more(self) -> "KleenePlus":
        return one_or_more(self)

    def zero_or_more(self) -> "KleeneStar":
        return zero_or_more(self)

    # deprecated
    def times(self, count: int) -> "QuantifyExact":
        return times(self, count)

    # deprecated
    def repeat(self, min_count: int, max_count: int) -> Union[
        "QuantifyMinimum", "QuantifyMaximum", "QuantifyBetween"
    ]:
        return repeat(self, min_count, max_count)

display_ascii_tree(indent='', is_last=True)

Display the regex tree in ASCII format.

Source code in outlines/types/dsl.py
def display_ascii_tree(self, indent="", is_last=True) -> str:
    """Display the regex tree in ASCII format."""
    branch = "└── " if is_last else "├── "
    result = indent + branch + self._display_node() + "\n"

    # Calculate the new indent for children
    new_indent = indent + ("    " if is_last else "│   ")

    # Let each subclass handle its children
    result += self._display_children(new_indent)
    return result

matches(value)

Check that a given value is in the language defined by the Term.

We make the assumption that the language defined by the term can be defined with a regular expression.

Source code in outlines/types/dsl.py
def matches(self, value: str) -> bool:
    """Check that a given value is in the language defined by the Term.

    We make the assumption that the language defined by the term can
    be defined with a regular expression.

    """
    pattern = to_regex(self)
    compiled = re.compile(pattern)
    if compiled.fullmatch(str(value)):
        return True
    return False

at_least(count, term)

Repeat the term at least count times.

Source code in outlines/types/dsl.py
def at_least(count: int, term: Union[Term, str]) -> QuantifyMinimum:
    """Repeat the term at least `count` times."""
    term = String(term) if isinstance(term, str) else term
    return QuantifyMinimum(term, count)

at_most(count, term)

Repeat the term exactly count times.

Source code in outlines/types/dsl.py
def at_most(count: int, term: Union[Term, str]) -> QuantifyMaximum:
    """Repeat the term exactly `count` times."""
    term = String(term) if isinstance(term, str) else term
    return QuantifyMaximum(term, count)

either(*terms)

Represents an alternative between different terms or strings.

This factory function automatically translates string arguments into String objects.

Source code in outlines/types/dsl.py
def either(*terms: Union[str, Term]):
    """Represents an alternative between different terms or strings.

    This factory function automatically translates string arguments
    into `String` objects.

    """
    terms = [String(arg) if isinstance(arg, str) else arg for arg in terms]
    return Alternatives(terms)

exactly(count, term)

Repeat the term exactly count times.

Source code in outlines/types/dsl.py
def exactly(count: int, term: Union[Term, str]) -> QuantifyExact:
    """Repeat the term exactly `count` times."""
    term = String(term) if isinstance(term, str) else term
    return QuantifyExact(term, count)

python_types_to_terms(ptype, recursion_depth=0)

Convert Python types to Outlines DSL terms that constrain LLM output.

Parameters:

Name Type Description Default
ptype Any

The Python type to convert

required
recursion_depth int

Current recursion depth to prevent infinite recursion

0

Returns:

Type Description
Term

The corresponding DSL Term instance.

Source code in outlines/types/dsl.py
def python_types_to_terms(ptype: Any, recursion_depth: int = 0) -> Term:
    """Convert Python types to Outlines DSL terms that constrain LLM output.

    Parameters
    ----------
    ptype
        The Python type to convert
    recursion_depth
        Current recursion depth to prevent infinite recursion

    Returns
    -------
    Term
        The corresponding DSL `Term` instance.

    """
    if recursion_depth > 10:
        raise RecursionError(
            f"Maximum recursion depth exceeded when converting {ptype}. "
            "This might be due to a recursive type definition."
        )

    # First handle Term instances
    if isinstance(ptype, Term):
        return ptype

    # Basic types
    if is_int(ptype):
        return types.integer
    elif is_float(ptype):
        return types.number
    elif is_bool(ptype):
        return types.boolean
    elif is_str(ptype):
        return types.string
    elif is_native_dict(ptype):
        return CFG(grammars.json)
    elif is_time(ptype):
        return types.time
    elif is_date(ptype):
        return types.date
    elif is_datetime(ptype):
        return types.datetime

    # Basic type instances
    if is_str_instance(ptype):
        return String(ptype)
    elif is_int_instance(ptype) or is_float_instance(ptype):
        return Regex(str(ptype))

    # Structured types
    structured_type_checks = [
        lambda x: is_dataclass(x),
        lambda x: is_typed_dict(x),
        lambda x: is_pydantic_model(x),
    ]
    if any(check(ptype) for check in structured_type_checks):
        schema = TypeAdapter(ptype).json_schema()
        return JsonSchema(schema)

    elif is_genson_schema_builder(ptype):
        schema = ptype.to_json()
        return JsonSchema(schema)

    if is_enum(ptype):
        return Alternatives(
            [
                python_types_to_terms(member, recursion_depth + 1)
                for member in _get_enum_members(ptype)
            ]
        )

    args = get_args(ptype)
    if is_literal(ptype):
        return _handle_literal(args)
    elif is_union(ptype):
        return _handle_union(args, recursion_depth)
    elif is_typing_list(ptype):
        return _handle_list(args, recursion_depth)
    elif is_typing_tuple(ptype):
        return _handle_tuple(args, recursion_depth)
    elif is_typing_dict(ptype):
        return _handle_dict(args, recursion_depth)

    if is_callable(ptype):
        return JsonSchema(get_schema_from_signature(ptype))

    type_name = getattr(ptype, "__name__", ptype)
    raise TypeError(
        f"Type {type_name} is currently not supported. Please open an issue: "
        "https://github.com/dottxt-ai/outlines/issues"
    )

to_regex(term)

Convert a term to a regular expression.

We only consider self-contained terms that do not refer to another rule.

Parameters:

Name Type Description Default
term Term

The term to convert to a regular expression.

required

Returns:

Type Description
str

The regular expression as a string.

Source code in outlines/types/dsl.py
def to_regex(term: Term) -> str:
    """Convert a term to a regular expression.

    We only consider self-contained terms that do not refer to another rule.

    Parameters
    ----------
    term
        The term to convert to a regular expression.

    Returns
    -------
    str
        The regular expression as a string.

    """
    match term:
        case String():
            return re.escape(term.value)
        case Regex():
            return f"({term.pattern})"
        case JsonSchema():
            regex_str = build_regex_from_schema(term.schema, term.whitespace_pattern)
            return f"({regex_str})"
        case KleeneStar():
            return f"({to_regex(term.term)})*"
        case KleenePlus():
            return f"({to_regex(term.term)})+"
        case Optional():
            return f"({to_regex(term.term)})?"
        case Alternatives():
            regexes = [to_regex(subterm) for subterm in term.terms]
            return f"({'|'.join(regexes)})"
        case Sequence():
            regexes = [to_regex(subterm) for subterm in term.terms]
            return f"{''.join(regexes)}"
        case QuantifyExact():
            return f"({to_regex(term.term)}){{{term.count}}}"
        case QuantifyMinimum():
            return f"({to_regex(term.term)}){{{term.min_count},}}"
        case QuantifyMaximum():
            return f"({to_regex(term.term)}){{,{term.max_count}}}"
        case QuantifyBetween():
            return f"({to_regex(term.term)}){{{term.min_count},{term.max_count}}}"
        case _:
            raise TypeError(
                f"Cannot convert object {repr(term)} to a regular expression."
            )

locale

Locale-specific regex patterns.

us

Locale-specific regex patterns for the United States.

utils

Utility functions for the types module.

get_schema_from_signature(fn)

Turn a function signature into a JSON schema.

Every JSON object valid to the output JSON Schema can be passed to fn using the ** unpacking syntax.

Source code in outlines/types/utils.py
def get_schema_from_signature(fn: Callable) -> dict:
    """Turn a function signature into a JSON schema.

    Every JSON object valid to the output JSON Schema can be passed
    to `fn` using the ** unpacking syntax.

    """
    signature = inspect.signature(fn)
    arguments = {}
    for name, arg in signature.parameters.items():
        if arg.annotation == inspect._empty:
            raise ValueError("Each argument must have a type annotation")
        else:
            arguments[name] = (arg.annotation, ...)

    try:
        fn_name = fn.__name__
    except Exception as e:
        fn_name = "Arguments"
        warnings.warn(
            f"The function name could not be determined. Using default name 'Arguments' instead. For debugging, here is exact error:\n{e}",
            category=UserWarning,
        )
    model = create_model(fn_name, **arguments)

    return model.model_json_schema()