Skip to content

models

Module that contains all the models integrated in outlines.

We group the models in submodules by provider instead of theme (completion, chat completion, diffusers, etc.) and use routing functions everywhere else in the codebase.

anthropic

Integration with Anthropic's API.

Anthropic

Bases: Model

Thin wrapper around the anthropic.Anthropic client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the anthropic.Anthropic client.

Source code in outlines/models/anthropic.py
class Anthropic(Model):
    """Thin wrapper around the `anthropic.Anthropic` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `anthropic.Anthropic` client.

    """
    def __init__(
        self, client: "AnthropicClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client
            An `anthropic.Anthropic` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = AnthropicTypeAdapter()

    def generate(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        completion = self.client.messages.create(
            **messages,
            **inference_kwargs,
        )
        return completion.content[0].text

    def generate_stream(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        stream = self.client.messages.create(
            **messages,
            stream=True,
            **inference_kwargs,
        )

        for chunk in stream:
            if (
                chunk.type == "content_block_delta"
                and chunk.delta.type == "text_delta"
            ):
                yield chunk.delta.text

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client Anthropic

An anthropic.Anthropic client.

required
model_name Optional[str]

The name of the model to use.

None
Source code in outlines/models/anthropic.py
def __init__(
    self, client: "AnthropicClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client
        An `anthropic.Anthropic` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = AnthropicTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using Anthropic.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

As structured generation is not supported by Anthropic, the value of this argument must be None. Otherwise, an error will be raised at runtime.

None
**inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
str

The response generated by the model.

Source code in outlines/models/anthropic.py
def generate(
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    completion = self.client.messages.create(
        **messages,
        **inference_kwargs,
    )
    return completion.content[0].text

generate_stream(model_input, output_type=None, **inference_kwargs)

Stream text using Anthropic.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

As structured generation is not supported by Anthropic, the value of this argument must be None. Otherwise, an error will be raised at runtime.

None
**inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/anthropic.py
def generate_stream(
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    stream = self.client.messages.create(
        **messages,
        stream=True,
        **inference_kwargs,
    )

    for chunk in stream:
        if (
            chunk.type == "content_block_delta"
            and chunk.delta.type == "text_delta"
        ):
            yield chunk.delta.text

AnthropicTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the Anthropic model.

AnthropicTypeAdapter is responsible for preparing the arguments to Anthropic's messages.create method: the input (prompt and possibly image). Anthropic does not support defining the output type, so format_output_type is not implemented.

Source code in outlines/models/anthropic.py
class AnthropicTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Anthropic` model.

    `AnthropicTypeAdapter` is responsible for preparing the arguments to
    Anthropic's `messages.create` method: the input (prompt and possibly
    image).
    Anthropic does not support defining the output type, so
    `format_output_type` is not implemented.

    """

    def format_input(self, model_input: Union[str, Vision]) -> dict:
        """Generate the `messages` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        dict
            The `messages` argument to pass to the client.

        """
        if isinstance(model_input, str):
            return self.format_str_model_input(model_input)
        elif isinstance(model_input, Vision):
            return self.format_vision_model_input(model_input)
        raise TypeError(
            f"The input type {input} is not available with Anthropic. "
            "The only available types are `str` and `Vision`."
        )

    def format_str_model_input(self, model_input: str) -> dict:
        return {
            "messages": [
                {
                    "role": "user",
                    "content": model_input,
                }
            ]
        }

    def format_vision_model_input(self, model_input: Vision) -> dict:
        return {
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": model_input.image_format,
                                "data": model_input.image_str,
                            },
                        },
                        {"type": "text", "text": model_input.prompt},
                    ],
                }
            ]
        }

    def format_output_type(self, output_type):
        """Not implemented for Anthropic."""
        if output_type is None:
            return {}
        else:
            raise NotImplementedError(
                f"The output type {output_type} is not available with "
                "Anthropic."
            )

format_input(model_input)

Generate the messages argument to pass to the client.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The input provided by the user.

required

Returns:

Type Description
dict

The messages argument to pass to the client.

Source code in outlines/models/anthropic.py
def format_input(self, model_input: Union[str, Vision]) -> dict:
    """Generate the `messages` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    dict
        The `messages` argument to pass to the client.

    """
    if isinstance(model_input, str):
        return self.format_str_model_input(model_input)
    elif isinstance(model_input, Vision):
        return self.format_vision_model_input(model_input)
    raise TypeError(
        f"The input type {input} is not available with Anthropic. "
        "The only available types are `str` and `Vision`."
    )

format_output_type(output_type)

Not implemented for Anthropic.

Source code in outlines/models/anthropic.py
def format_output_type(self, output_type):
    """Not implemented for Anthropic."""
    if output_type is None:
        return {}
    else:
        raise NotImplementedError(
            f"The output type {output_type} is not available with "
            "Anthropic."
        )

from_anthropic(client, model_name=None)

Create an Outlines Anthropic model instance from an anthropic.Anthropic client instance.

Parameters:

Name Type Description Default
client Anthropic

An anthropic.Anthropic client instance.

required
model_name Optional[str]

The name of the model to use.

None

Returns:

Type Description
Anthropic

An Outlines Anthropic model instance.

Source code in outlines/models/anthropic.py
def from_anthropic(
    client: "AnthropicClient", model_name: Optional[str] = None
) -> Anthropic:
    """Create an Outlines `Anthropic` model instance from an
    `anthropic.Anthropic` client instance.

    Parameters
    ----------
    client
        An `anthropic.Anthropic` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Anthropic
        An Outlines `Anthropic` model instance.

    """
    return Anthropic(client, model_name)

base

Base classes for all models and model type adapters.

AsyncModel

Bases: ABC

Base class for all asynchronous models.

This class defines a shared __call__ method that can be used to call the model directly. All models inheriting from this class must define a type_adapter attribute of type ModelTypeAdapter. The methods of the type_adapter attribute are used in the generate method to format the input and output types received by the model. Additionally, local models must define a tensor_library_name attribute.

Source code in outlines/models/base.py
class AsyncModel(ABC):
    """Base class for all asynchronous models.

    This class defines a shared `__call__` method that can be used to call the
    model directly.
    All models inheriting from this class must define a `type_adapter`
    attribute of type `ModelTypeAdapter`. The methods of the `type_adapter`
    attribute are used in the `generate` method to format the input and output
    types received by the model.
    Additionally, local models must define a `tensor_library_name` attribute.

    """
    type_adapter: ModelTypeAdapter
    tensor_library_name: str

    async def __call__(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Call the model.

        Users can call the model directly, in which case we will create a
        generator instance with the output type provided and call it.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        await generator("prompt")
        ```
        and
        ```python
        await model("prompt", Foo)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type)
        return await generator(model_input, **inference_kwargs)

    async def stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> AsyncIterator[Any]:
        """Stream a response from the model.

        Users can use the `stream` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `stream` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        async for chunk in generator("prompt"):
            print(chunk)
        ```
        and
        ```python
        async for chunk in model.stream("prompt", Foo):
            print(chunk)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        AsyncIterator[Any]
            A stream of responses from the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type)

        async for chunk in generator.stream(model_input, **inference_kwargs):  # type: ignore
            yield chunk

    @abstractmethod
    async def generate(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Generate a response from the model.

        The output_type argument contains a logits processor for local models
        while it contains a type (Json, Enum...) for the API-based models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        ...

    @abstractmethod
    async def generate_stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> AsyncIterator[Any]:
        """Generate a stream of responses from the model.

        The output_type argument contains a logits processor for local models
        while it contains a type (Json, Enum...) for the API-based models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        AsyncIterator[Any]
            A coroutine that will produce an async iterator of responses from the model.

        """
        ...

__call__(model_input, output_type=None, **inference_kwargs) async

Call the model.

Users can call the model directly, in which case we will create a generator instance with the output type provided and call it. Thus, those commands are equivalent:

generator = Generator(model, Foo)
await generator("prompt")
and
await model("prompt", Foo)

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
Any

The response generated by the model.

Source code in outlines/models/base.py
async def __call__(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Call the model.

    Users can call the model directly, in which case we will create a
    generator instance with the output type provided and call it.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    await generator("prompt")
    ```
    and
    ```python
    await model("prompt", Foo)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type)
    return await generator(model_input, **inference_kwargs)

generate(model_input, output_type=None, **inference_kwargs) abstractmethod async

Generate a response from the model.

The output_type argument contains a logits processor for local models while it contains a type (Json, Enum...) for the API-based models. This method is not intended to be used directly by end users.

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
Any

The response generated by the model.

Source code in outlines/models/base.py
@abstractmethod
async def generate(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Generate a response from the model.

    The output_type argument contains a logits processor for local models
    while it contains a type (Json, Enum...) for the API-based models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    ...

generate_stream(model_input, output_type=None, **inference_kwargs) abstractmethod async

Generate a stream of responses from the model.

The output_type argument contains a logits processor for local models while it contains a type (Json, Enum...) for the API-based models. This method is not intended to be used directly by end users.

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
AsyncIterator[Any]

A coroutine that will produce an async iterator of responses from the model.

Source code in outlines/models/base.py
@abstractmethod
async def generate_stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> AsyncIterator[Any]:
    """Generate a stream of responses from the model.

    The output_type argument contains a logits processor for local models
    while it contains a type (Json, Enum...) for the API-based models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    AsyncIterator[Any]
        A coroutine that will produce an async iterator of responses from the model.

    """
    ...

stream(model_input, output_type=None, **inference_kwargs) async

Stream a response from the model.

Users can use the stream method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its stream method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
async for chunk in generator("prompt"):
    print(chunk)
and
async for chunk in model.stream("prompt", Foo):
    print(chunk)

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
AsyncIterator[Any]

A stream of responses from the model.

Source code in outlines/models/base.py
async def stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> AsyncIterator[Any]:
    """Stream a response from the model.

    Users can use the `stream` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `stream` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    async for chunk in generator("prompt"):
        print(chunk)
    ```
    and
    ```python
    async for chunk in model.stream("prompt", Foo):
        print(chunk)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    AsyncIterator[Any]
        A stream of responses from the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type)

    async for chunk in generator.stream(model_input, **inference_kwargs):  # type: ignore
        yield chunk

Model

Bases: ABC

Base class for all synchronous models.

This class defines a shared __call__ method that can be used to call the model directly. All models inheriting from this class must define a type_adapter attribute of type ModelTypeAdapter. The methods of the type_adapter attribute are used in the generate method to format the input and output types received by the model. Additionally, local models must define a tensor_library_name attribute.

Source code in outlines/models/base.py
class Model(ABC):
    """Base class for all synchronous models.

    This class defines a shared `__call__` method that can be used to call the
    model directly.
    All models inheriting from this class must define a `type_adapter`
    attribute of type `ModelTypeAdapter`. The methods of the `type_adapter`
    attribute are used in the `generate` method to format the input and output
    types received by the model.
    Additionally, local models must define a `tensor_library_name` attribute.

    """
    type_adapter: ModelTypeAdapter
    tensor_library_name: str

    def __call__(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Call the model.

        Users can call the model directly, in which case we will create a
        generator instance with the output type provided and call it.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        generator("prompt")
        ```
        and
        ```python
        model("prompt", Foo)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        from outlines import Generator

        return Generator(self, output_type)(model_input, **inference_kwargs)

    def stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Stream a response from the model.

        Users can use the `stream` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `stream` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        for chunk in generator("prompt"):
            print(chunk)
        ```
        and
        ```python
        for chunk in model.stream("prompt", Foo):
            print(chunk)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type)
        return generator.stream(model_input, **inference_kwargs) # type: ignore

    @abstractmethod
    def generate(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Generate a response from the model.

        The output_type argument contains a logits processor for local models
        while it contains a type (Json, Enum...) for the API-based models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        ...

    @abstractmethod
    def generate_stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Generate a stream of responses from the model.

        The output_type argument contains a logits processor for local models
        while it contains a type (Json, Enum...) for the API-based models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        ...

__call__(model_input, output_type=None, **inference_kwargs)

Call the model.

Users can call the model directly, in which case we will create a generator instance with the output type provided and call it. Thus, those commands are equivalent:

generator = Generator(model, Foo)
generator("prompt")
and
model("prompt", Foo)

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
Any

The response generated by the model.

Source code in outlines/models/base.py
def __call__(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Call the model.

    Users can call the model directly, in which case we will create a
    generator instance with the output type provided and call it.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    generator("prompt")
    ```
    and
    ```python
    model("prompt", Foo)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    from outlines import Generator

    return Generator(self, output_type)(model_input, **inference_kwargs)

generate(model_input, output_type=None, **inference_kwargs) abstractmethod

Generate a response from the model.

The output_type argument contains a logits processor for local models while it contains a type (Json, Enum...) for the API-based models. This method is not intended to be used directly by end users.

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
Any

The response generated by the model.

Source code in outlines/models/base.py
@abstractmethod
def generate(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Generate a response from the model.

    The output_type argument contains a logits processor for local models
    while it contains a type (Json, Enum...) for the API-based models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    ...

generate_stream(model_input, output_type=None, **inference_kwargs) abstractmethod

Generate a stream of responses from the model.

The output_type argument contains a logits processor for local models while it contains a type (Json, Enum...) for the API-based models. This method is not intended to be used directly by end users.

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
Iterator[Any]

A stream of responses from the model.

Source code in outlines/models/base.py
@abstractmethod
def generate_stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Generate a stream of responses from the model.

    The output_type argument contains a logits processor for local models
    while it contains a type (Json, Enum...) for the API-based models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    ...

stream(model_input, output_type=None, **inference_kwargs)

Stream a response from the model.

Users can use the stream method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its stream method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
for chunk in generator("prompt"):
    print(chunk)
and
for chunk in model.stream("prompt", Foo):
    print(chunk)

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
Iterator[Any]

A stream of responses from the model.

Source code in outlines/models/base.py
def stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Stream a response from the model.

    Users can use the `stream` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `stream` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    for chunk in generator("prompt"):
        print(chunk)
    ```
    and
    ```python
    for chunk in model.stream("prompt", Foo):
        print(chunk)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type)
    return generator.stream(model_input, **inference_kwargs) # type: ignore

ModelTypeAdapter

Bases: ABC

Base class for all model type adapters.

A type adapter instance must be given as a value to the type_adapter attribute when instantiating a model. The type adapter is responsible for formatting the input and output types passed to the model to match the specific format expected by the associated model.

Source code in outlines/models/base.py
class ModelTypeAdapter(ABC):
    """Base class for all model type adapters.

    A type adapter instance must be given as a value to the `type_adapter`
    attribute when instantiating a model.
    The type adapter is responsible for formatting the input and output types
    passed to the model to match the specific format expected by the
    associated model.

    """

    @abstractmethod
    def format_input(self, model_input: Any) -> Any:
        """Format the user input to the expected format of the model.

        For API-based models, it typically means creating the `messages`
        argument passed to the client. For local models, it can mean casting
        the input from str to list for instance.
        This method is also used to validate that the input type provided by
        the user is supported by the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        Any
            The formatted input to be passed to the model.

        """
        ...

    @abstractmethod
    def format_output_type(self, output_type: Optional[Any] = None) -> Any:
        """Format the output type to the expected format of the model.

        For API-based models, this typically means creating a `response_format`
        argument. For local models, it means formatting the logits processor to
        create the object type expected by the model.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        Any
            The formatted output type to be passed to the model.

        """
        ...

format_input(model_input) abstractmethod

Format the user input to the expected format of the model.

For API-based models, it typically means creating the messages argument passed to the client. For local models, it can mean casting the input from str to list for instance. This method is also used to validate that the input type provided by the user is supported by the model.

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required

Returns:

Type Description
Any

The formatted input to be passed to the model.

Source code in outlines/models/base.py
@abstractmethod
def format_input(self, model_input: Any) -> Any:
    """Format the user input to the expected format of the model.

    For API-based models, it typically means creating the `messages`
    argument passed to the client. For local models, it can mean casting
    the input from str to list for instance.
    This method is also used to validate that the input type provided by
    the user is supported by the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    Any
        The formatted input to be passed to the model.

    """
    ...

format_output_type(output_type=None) abstractmethod

Format the output type to the expected format of the model.

For API-based models, this typically means creating a response_format argument. For local models, it means formatting the logits processor to create the object type expected by the model.

Parameters:

Name Type Description Default
output_type Optional[Any]

The output type provided by the user.

None

Returns:

Type Description
Any

The formatted output type to be passed to the model.

Source code in outlines/models/base.py
@abstractmethod
def format_output_type(self, output_type: Optional[Any] = None) -> Any:
    """Format the output type to the expected format of the model.

    For API-based models, this typically means creating a `response_format`
    argument. For local models, it means formatting the logits processor to
    create the object type expected by the model.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    Any
        The formatted output type to be passed to the model.

    """
    ...

dottxt

Integration with Dottxt's API.

Dottxt

Bases: Model

Thin wrapper around the dottxt.client.Dottxt client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the dottxt.client.Dottxt client.

Source code in outlines/models/dottxt.py
class Dottxt(Model):
    """Thin wrapper around the `dottxt.client.Dottxt` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `dottxt.client.Dottxt` client.

    """

    def __init__(
        self,
        client: "DottxtClient",
        model_name: Optional[str] = None,
        model_revision: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            A `dottxt.Dottxt` client.
        model_name
            The name of the model to use.
        model_revision
            The revision of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.model_revision = model_revision
        self.type_adapter = DottxtTypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Dottxt.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        prompt = self.type_adapter.format_input(model_input)
        json_schema = self.type_adapter.format_output_type(output_type)

        if (
            "model_name" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model_name"] = self.model_name

        if (
            "model_revision" not in inference_kwargs
            and self.model_revision is not None
        ):
            inference_kwargs["model_revision"] = self.model_revision

        completion = self.client.json(
            prompt,
            json_schema,
            **inference_kwargs,
        )
        return completion.data

    def generate_stream(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        """Not available for Dottxt."""
        raise NotImplementedError(
            "Dottxt does not support streaming. Call the model/generator for "
            + "regular generation instead."
        )

__init__(client, model_name=None, model_revision=None)

Parameters:

Name Type Description Default
client Dottxt

A dottxt.Dottxt client.

required
model_name Optional[str]

The name of the model to use.

None
model_revision Optional[str]

The revision of the model to use.

None
Source code in outlines/models/dottxt.py
def __init__(
    self,
    client: "DottxtClient",
    model_name: Optional[str] = None,
    model_revision: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        A `dottxt.Dottxt` client.
    model_name
        The name of the model to use.
    model_revision
        The revision of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.model_revision = model_revision
    self.type_adapter = DottxtTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using Dottxt.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.

None
**inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
str

The text generated by the model.

Source code in outlines/models/dottxt.py
def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Dottxt.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    prompt = self.type_adapter.format_input(model_input)
    json_schema = self.type_adapter.format_output_type(output_type)

    if (
        "model_name" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model_name"] = self.model_name

    if (
        "model_revision" not in inference_kwargs
        and self.model_revision is not None
    ):
        inference_kwargs["model_revision"] = self.model_revision

    completion = self.client.json(
        prompt,
        json_schema,
        **inference_kwargs,
    )
    return completion.data

generate_stream(model_input, output_type=None, **inference_kwargs)

Not available for Dottxt.

Source code in outlines/models/dottxt.py
def generate_stream(
    self,
    model_input,
    output_type=None,
    **inference_kwargs,
):
    """Not available for Dottxt."""
    raise NotImplementedError(
        "Dottxt does not support streaming. Call the model/generator for "
        + "regular generation instead."
    )

DottxtTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the Dottxt model.

Source code in outlines/models/dottxt.py
class DottxtTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Dottxt` model."""

    def format_input(self, model_input: str) -> str:
        """Format the prompt to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str
            The input to pass to the client.

        """
        if isinstance(model_input, str):
            return model_input
        raise TypeError(
            f"The input type {model_input} is not available with Dottxt. "
            "The only available type is `str`."
        )

    def format_output_type(self, output_type: Optional[Any] = None) -> str:
        """Format the output type to pass to the client.

        TODO: `int`, `float` and other Python types could be supported via
        JSON Schema.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        str
            The output type to pass to the client.

        """
        # Unsupported languages
        if output_type is None:
            raise TypeError(
                "You must provide an output type. Dottxt only supports "
                "constrained generation."
            )
        elif isinstance(output_type, Regex):
            raise TypeError(
                "Regex-based structured outputs will soon be available with "
                "Dottxt. Use an open source model in the meantime."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs will soon be available with "
                "Dottxt. Use an open source model in the meantime."
            )

        elif isinstance(output_type, JsonSchema):
            return output_type.schema
        elif is_dataclass(output_type):
            schema = TypeAdapter(output_type).json_schema()
            return json.dumps(schema)
        elif is_typed_dict(output_type):
            schema = TypeAdapter(output_type).json_schema()
            return json.dumps(schema)
        elif is_pydantic_model(output_type):
            schema = output_type.model_json_schema()
            return json.dumps(schema)
        elif is_genson_schema_builder(output_type):
            return output_type.to_json()
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by Dottxt. "
                "Consider using a local mode instead."
            )

format_input(model_input)

Format the prompt to pass to the client.

Parameters:

Name Type Description Default
model_input str

The input provided by the user.

required

Returns:

Type Description
str

The input to pass to the client.

Source code in outlines/models/dottxt.py
def format_input(self, model_input: str) -> str:
    """Format the prompt to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str
        The input to pass to the client.

    """
    if isinstance(model_input, str):
        return model_input
    raise TypeError(
        f"The input type {model_input} is not available with Dottxt. "
        "The only available type is `str`."
    )

format_output_type(output_type=None)

Format the output type to pass to the client.

TODO: int, float and other Python types could be supported via JSON Schema.

Parameters:

Name Type Description Default
output_type Optional[Any]

The output type provided by the user.

None

Returns:

Type Description
str

The output type to pass to the client.

Source code in outlines/models/dottxt.py
def format_output_type(self, output_type: Optional[Any] = None) -> str:
    """Format the output type to pass to the client.

    TODO: `int`, `float` and other Python types could be supported via
    JSON Schema.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    str
        The output type to pass to the client.

    """
    # Unsupported languages
    if output_type is None:
        raise TypeError(
            "You must provide an output type. Dottxt only supports "
            "constrained generation."
        )
    elif isinstance(output_type, Regex):
        raise TypeError(
            "Regex-based structured outputs will soon be available with "
            "Dottxt. Use an open source model in the meantime."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs will soon be available with "
            "Dottxt. Use an open source model in the meantime."
        )

    elif isinstance(output_type, JsonSchema):
        return output_type.schema
    elif is_dataclass(output_type):
        schema = TypeAdapter(output_type).json_schema()
        return json.dumps(schema)
    elif is_typed_dict(output_type):
        schema = TypeAdapter(output_type).json_schema()
        return json.dumps(schema)
    elif is_pydantic_model(output_type):
        schema = output_type.model_json_schema()
        return json.dumps(schema)
    elif is_genson_schema_builder(output_type):
        return output_type.to_json()
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by Dottxt. "
            "Consider using a local mode instead."
        )

from_dottxt(client, model_name=None, model_revision=None)

Create an Outlines Dottxt model instance from a dottxt.Dottxt client instance.

Parameters:

Name Type Description Default
client Dottxt

A dottxt.Dottxt client instance.

required
model_name Optional[str]

The name of the model to use.

None
model_revision Optional[str]

The revision of the model to use.

None

Returns:

Type Description
Dottxt

An Outlines Dottxt model instance.

Source code in outlines/models/dottxt.py
def from_dottxt(
    client: "DottxtClient",
    model_name: Optional[str] = None,
    model_revision: Optional[str] = None,
) -> Dottxt:
    """Create an Outlines `Dottxt` model instance from a `dottxt.Dottxt`
    client instance.

    Parameters
    ----------
    client
        A `dottxt.Dottxt` client instance.
    model_name
        The name of the model to use.
    model_revision
        The revision of the model to use.

    Returns
    -------
    Dottxt
        An Outlines `Dottxt` model instance.

    """
    return Dottxt(client, model_name, model_revision)

gemini

Integration with Gemini's API.

Gemini

Bases: Model

Thin wrapper around the google.genai.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the google.genai.Client client.

Source code in outlines/models/gemini.py
class Gemini(Model):
    """Thin wrapper around the `google.genai.Client` client.

    This wrapper is used to convert the input and output types specified by
    the users at a higher level to arguments to the `google.genai.Client`
    client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            A `google.genai.Client` instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = GeminiTypeAdapter()

    def generate(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> str:
        """Generate a response from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        completion = self.client.models.generate_content(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs}
        )

        return completion.text

    def generate_stream(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        stream = self.client.models.generate_content_stream(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs},
        )

        for chunk in stream:
            if hasattr(chunk, "text") and chunk.text:
                yield chunk.text

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client Client

A google.genai.Client instance.

required
model_name Optional[str]

The name of the model to use.

None
Source code in outlines/models/gemini.py
def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = GeminiTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate a response from the model.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.

None
**inference_kwargs

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
str

The response generated by the model.

Source code in outlines/models/gemini.py
def generate(
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> str:
    """Generate a response from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    completion = self.client.models.generate_content(
        **contents,
        model=inference_kwargs.pop("model", self.model_name),
        config={**generation_config, **inference_kwargs}
    )

    return completion.text

generate_stream(model_input, output_type=None, **inference_kwargs)

Generate a stream of responses from the model.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.

None
**inference_kwargs

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/gemini.py
def generate_stream(
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    stream = self.client.models.generate_content_stream(
        **contents,
        model=inference_kwargs.pop("model", self.model_name),
        config={**generation_config, **inference_kwargs},
    )

    for chunk in stream:
        if hasattr(chunk, "text") and chunk.text:
            yield chunk.text

GeminiTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the Gemini model.

GeminiTypeAdapter is responsible for preparing the arguments to Gemini's client models.generate_content method: the input (prompt and possibly image), as well as the output type (either JSON or multiple choice).

Source code in outlines/models/gemini.py
class GeminiTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Gemini` model.

    `GeminiTypeAdapter` is responsible for preparing the arguments to Gemini's
    client `models.generate_content` method: the input (prompt and possibly
    image), as well as the output type (either JSON or multiple choice).

    """

    def format_input(self, model_input: Union[str, Vision]) -> dict:
        """Generate the `contents` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        dict
            The `contents` argument to pass to the client.

        """
        if isinstance(model_input, str):
            return {"contents": [model_input]}
        elif isinstance(model_input, Vision):
            from google.genai import types

            image_part = types.Part.from_bytes(
                data=model_input.image_str,
                mime_type=model_input.image_format
            ),
            return {"contents": [model_input.prompt, image_part]}
        else:
            raise TypeError(
                f"The input type {input} is not available with Gemini. "
                "The only available types are `str` and `Vision`."
            )

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the `generation_config` argument to pass to the client.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        dict
            The `generation_config` argument to pass to the client.

        """

        # Unsupported output pytes
        if isinstance(output_type, Regex):
            raise TypeError(
                "Neither regex-based structured outputs nor the `pattern` "
                "keyword in Json Schema are available with Gemini. Use an "
                "open source model or dottxt instead."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not available with Gemini. "
                "Use an open source model or dottxt instead."
            )
        elif is_genson_schema_builder(output_type):
            raise TypeError(
                "The Gemini SDK does not accept Genson schema builders as an "
                "input. Pass a Pydantic model, typed dict or dataclass "
                "instead."
            )
        elif isinstance(output_type, JsonSchema):
            raise TypeError(
                "The Gemini SDK does not accept Json Schemas as an input. "
                "Pass a Pydantic model, typed dict or dataclass instead."
            )

        if output_type is None:
            return {}

        # Structured types
        elif is_dataclass(output_type):
            return self.format_json_output_type(output_type)
        elif is_typed_dict(output_type):
            return self.format_json_output_type(output_type)
        elif is_pydantic_model(output_type):
            return self.format_json_output_type(output_type)

        # List of structured types
        elif is_typing_list(output_type):
            return self.format_list_output_type(output_type)

        # Multiple choice types
        elif is_enum(output_type):
            return self.format_enum_output_type(output_type)
        elif is_literal(output_type):
            enum = get_enum_from_literal(output_type)
            return self.format_enum_output_type(enum)

        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by Gemini. "
                "Consider using a local model or dottxt instead."
            )

    def format_enum_output_type(self, output_type: Optional[Any]) -> dict:
        return {
            "response_mime_type": "text/x.enum",
            "response_schema": output_type,
        }

    def format_json_output_type(self, output_type: Optional[Any]) -> dict:
        return {
            "response_mime_type": "application/json",
            "response_schema": output_type,
        }

    def format_list_output_type(self, output_type: Optional[Any]) -> dict:
        args = get_args(output_type)

        if len(args) == 1:
            item_type = args[0]

            # Check if list item type is supported
            if (
                is_pydantic_model(item_type)
                or is_typed_dict(item_type)
                or is_dataclass(item_type)
            ):
                return {
                    "response_mime_type": "application/json",
                    "response_schema": output_type,
                }

            else:
                raise TypeError(
                    "The only supported types for list items are Pydantic "
                    + "models, typed dicts and dataclasses."
                )

        raise TypeError(
            f"Gemini only supports homogeneous lists: "
            "list[BaseModel], list[TypedDict] or list[dataclass]. "
            f"Got {output_type} instead."
        )

format_input(model_input)

Generate the contents argument to pass to the client.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The input provided by the user.

required

Returns:

Type Description
dict

The contents argument to pass to the client.

Source code in outlines/models/gemini.py
def format_input(self, model_input: Union[str, Vision]) -> dict:
    """Generate the `contents` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    dict
        The `contents` argument to pass to the client.

    """
    if isinstance(model_input, str):
        return {"contents": [model_input]}
    elif isinstance(model_input, Vision):
        from google.genai import types

        image_part = types.Part.from_bytes(
            data=model_input.image_str,
            mime_type=model_input.image_format
        ),
        return {"contents": [model_input.prompt, image_part]}
    else:
        raise TypeError(
            f"The input type {input} is not available with Gemini. "
            "The only available types are `str` and `Vision`."
        )

format_output_type(output_type=None)

Generate the generation_config argument to pass to the client.

Parameters:

Name Type Description Default
output_type Optional[Any]

The output type provided by the user.

None

Returns:

Type Description
dict

The generation_config argument to pass to the client.

Source code in outlines/models/gemini.py
def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the `generation_config` argument to pass to the client.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    dict
        The `generation_config` argument to pass to the client.

    """

    # Unsupported output pytes
    if isinstance(output_type, Regex):
        raise TypeError(
            "Neither regex-based structured outputs nor the `pattern` "
            "keyword in Json Schema are available with Gemini. Use an "
            "open source model or dottxt instead."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not available with Gemini. "
            "Use an open source model or dottxt instead."
        )
    elif is_genson_schema_builder(output_type):
        raise TypeError(
            "The Gemini SDK does not accept Genson schema builders as an "
            "input. Pass a Pydantic model, typed dict or dataclass "
            "instead."
        )
    elif isinstance(output_type, JsonSchema):
        raise TypeError(
            "The Gemini SDK does not accept Json Schemas as an input. "
            "Pass a Pydantic model, typed dict or dataclass instead."
        )

    if output_type is None:
        return {}

    # Structured types
    elif is_dataclass(output_type):
        return self.format_json_output_type(output_type)
    elif is_typed_dict(output_type):
        return self.format_json_output_type(output_type)
    elif is_pydantic_model(output_type):
        return self.format_json_output_type(output_type)

    # List of structured types
    elif is_typing_list(output_type):
        return self.format_list_output_type(output_type)

    # Multiple choice types
    elif is_enum(output_type):
        return self.format_enum_output_type(output_type)
    elif is_literal(output_type):
        enum = get_enum_from_literal(output_type)
        return self.format_enum_output_type(enum)

    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by Gemini. "
            "Consider using a local model or dottxt instead."
        )

from_gemini(client, model_name=None)

Create an Outlines Gemini model instance from a google.genai.Client instance.

Parameters:

Name Type Description Default
client Client

A google.genai.Client instance.

required
model_name Optional[str]

The name of the model to use.

None

Returns:

Type Description
Gemini

An Outlines Gemini model instance.

Source code in outlines/models/gemini.py
def from_gemini(client: "Client", model_name: Optional[str] = None) -> Gemini:
    """Create an Outlines `Gemini` model instance from a
    `google.genai.Client` instance.

    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Gemini
        An Outlines `Gemini` model instance.

    """
    return Gemini(client, model_name)

llamacpp

Integration with the llama-cpp-python library.

LlamaCpp

Bases: Model

Thin wrapper around the llama_cpp.Llama model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the llama_cpp.Llama model.

Source code in outlines/models/llamacpp.py
class LlamaCpp(Model):
    """Thin wrapper around the `llama_cpp.Llama` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `llama_cpp.Llama` model.
    """

    tensor_library_name = "numpy"

    def __init__(self, model: "Llama"):
        """
        Parameters
        ----------
        model
            A `llama_cpp.Llama` model instance.

        """
        self.model = model
        self.tokenizer = LlamaCppTokenizer(self.model)
        self.type_adapter = LlamaCppTypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        if isinstance(output_type, CFGLogitsProcessor):
            raise NotImplementedError(
                "CFG generation is not supported for LlamaCpp due to bug in "
                "the llama_cpp tokenizer"
            )

        completion = self.model(
            self.type_adapter.format_input(model_input),
            logits_processor=self.type_adapter.format_output_type(output_type),
            **inference_kwargs,
        )
        result = completion["choices"][0]["text"]

        self.model.reset()

        return result

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if isinstance(output_type, CFGLogitsProcessor):
            raise NotImplementedError(
                "CFG generation is not supported for LlamaCpp due to bug in "
                "the llama_cpp tokenizer"
            )

        generator = self.model(
            self.type_adapter.format_input(model_input),
            logits_processor=self.type_adapter.format_output_type(output_type),
            stream=True,
            **inference_kwargs,
        )

        def token_generator() -> Iterator[str]:
            while True:
                try:
                    result = next(generator)
                    yield result["choices"][0]["text"]
                except StopIteration:
                    self.model.reset()
                    return

        return token_generator()

    def load_lora(self, adapter_path: str) -> None:  # pragma: no cover
        """Load a LoRA adapter. Deprecated since v1.0.0."""
        warnings.warn("""
            The `load_lora` method is deprecated starting from v1.0.0.
            Support for it will be removed in v1.1.0.
            """,
            DeprecationWarning,
            stacklevel=2,
        )
        if self.model._model.apply_lora_from_file(
            adapter_path,
            1.0,
        ):
            raise RuntimeError(
                f"Failed to apply LoRA from lora path: {adapter_path}"
            )

__init__(model)

Parameters:

Name Type Description Default
model Llama

A llama_cpp.Llama model instance.

required
Source code in outlines/models/llamacpp.py
def __init__(self, model: "Llama"):
    """
    Parameters
    ----------
    model
        A `llama_cpp.Llama` model instance.

    """
    self.model = model
    self.tokenizer = LlamaCppTokenizer(self.model)
    self.type_adapter = LlamaCppTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using llama-cpp-python.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[OutlinesLogitsProcessor]

The logits processor the model will use to constrain the format of the generated text.

None
**inference_kwargs Any

Additional keyword arguments to pass to the Llama.__call__ method of the llama-cpp-python library.

{}

Returns:

Type Description
str

The text generated by the model.

Source code in outlines/models/llamacpp.py
def generate(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    if isinstance(output_type, CFGLogitsProcessor):
        raise NotImplementedError(
            "CFG generation is not supported for LlamaCpp due to bug in "
            "the llama_cpp tokenizer"
        )

    completion = self.model(
        self.type_adapter.format_input(model_input),
        logits_processor=self.type_adapter.format_output_type(output_type),
        **inference_kwargs,
    )
    result = completion["choices"][0]["text"]

    self.model.reset()

    return result

generate_stream(model_input, output_type=None, **inference_kwargs)

Stream text using llama-cpp-python.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[OutlinesLogitsProcessor]

The logits processor the model will use to constrain the format of the generated text.

None
**inference_kwargs Any

Additional keyword arguments to pass to the Llama.__call__ method of the llama-cpp-python library.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/llamacpp.py
def generate_stream(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if isinstance(output_type, CFGLogitsProcessor):
        raise NotImplementedError(
            "CFG generation is not supported for LlamaCpp due to bug in "
            "the llama_cpp tokenizer"
        )

    generator = self.model(
        self.type_adapter.format_input(model_input),
        logits_processor=self.type_adapter.format_output_type(output_type),
        stream=True,
        **inference_kwargs,
    )

    def token_generator() -> Iterator[str]:
        while True:
            try:
                result = next(generator)
                yield result["choices"][0]["text"]
            except StopIteration:
                self.model.reset()
                return

    return token_generator()

load_lora(adapter_path)

Load a LoRA adapter. Deprecated since v1.0.0.

Source code in outlines/models/llamacpp.py
def load_lora(self, adapter_path: str) -> None:  # pragma: no cover
    """Load a LoRA adapter. Deprecated since v1.0.0."""
    warnings.warn("""
        The `load_lora` method is deprecated starting from v1.0.0.
        Support for it will be removed in v1.1.0.
        """,
        DeprecationWarning,
        stacklevel=2,
    )
    if self.model._model.apply_lora_from_file(
        adapter_path,
        1.0,
    ):
        raise RuntimeError(
            f"Failed to apply LoRA from lora path: {adapter_path}"
        )

LlamaCppTokenizer

Bases: Tokenizer

Source code in outlines/models/llamacpp.py
class LlamaCppTokenizer(Tokenizer):
    def __init__(self, model: "Llama"):
        self.eos_token_id = model.token_eos()
        self.eos_token = model.tokenizer().decode([self.eos_token_id])
        self.pad_token_id = self.eos_token_id
        self.special_tokens: Set[str] = set()

        self.vocabulary: Dict[str, int] = dict()

        self.tokenizer = model.tokenizer()

        # TODO: Remove when https://github.com/ggerganov/llama.cpp/pull/5613
        # is resolved
        self._hf_tokenizer = None
        try:
            self.vocabulary = model.tokenizer_.hf_tokenizer.get_vocab()
            self._hf_tokenizer = model.tokenizer_.hf_tokenizer
        except AttributeError:
            # ###
            for t in range(model.n_vocab()):
                token_piece = model.tokenizer().decode([t])
                self.vocabulary[token_piece] = t

        # ensure stable ordering of vocabulary
        self.vocabulary = {
            tok: tok_id
            for tok, tok_id
            in sorted(self.vocabulary.items(), key=lambda x: x[1])
        }

        self._hash = None

    def decode(self, token_ids: List[int]) -> List[str]:
        decoded_bytes = self.tokenizer.detokenize(token_ids)
        return [decoded_bytes.decode("utf-8", errors="ignore")]

    def encode(
        self,
        prompt: Union[str, List[str]],
        add_bos: bool = True,
        special: bool = True,
    ) -> Tuple[List[int], List[int]]:
        if isinstance(prompt, list):
            raise NotImplementedError(
                "llama-cpp-python tokenizer doesn't support batch tokenization"
            )
        token_ids = self.tokenizer.tokenize(
            prompt.encode("utf-8", errors="ignore"),
            add_bos=add_bos,
            special=special,
        )
        # generate attention mask, missing from llama-cpp-python
        attention_mask = [
            1 if token_id != self.pad_token_id else 0 for token_id in token_ids
        ]
        return token_ids, attention_mask

    def convert_token_to_string(self, token: str) -> str:
        if self._hf_tokenizer is not None:
            from transformers.file_utils import SPIECE_UNDERLINE

            token_str = self._hf_tokenizer.convert_tokens_to_string([token])
            if (
                token.startswith(SPIECE_UNDERLINE)
                or token == "<0x20>"
            ):  # pragma: no cover
                token_str = " " + token_str
            return token_str
        else:
            return token

    def __eq__(self, other):
        if not isinstance(other, LlamaCppTokenizer):
            return False
        return self.__getstate__() == other.__getstate__()

    def __hash__(self):
        # We create a custom hash as pickle.dumps(self) is not stable
        if self._hash is None:
            self._hash = hash((
                tuple(sorted(self.vocabulary.items())),
                self.eos_token_id,
                self.eos_token,
                self.pad_token_id,
                tuple(sorted(self.special_tokens)),
            ))
        return self._hash

    def __getstate__(self):
        """Create a stable representation for outlines.caching"""
        return (
            self.vocabulary,
            self.eos_token_id,
            self.eos_token,
            self.pad_token_id,
            sorted(self.special_tokens),
        )

    def __setstate__(self, state):
        raise NotImplementedError("Cannot load a pickled llamacpp tokenizer")

__getstate__()

Create a stable representation for outlines.caching

Source code in outlines/models/llamacpp.py
def __getstate__(self):
    """Create a stable representation for outlines.caching"""
    return (
        self.vocabulary,
        self.eos_token_id,
        self.eos_token,
        self.pad_token_id,
        sorted(self.special_tokens),
    )

LlamaCppTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the LlamaCpp model.

LlamaCppTypeAdapter is responsible for preparing the arguments to llama-cpp-python's Llama.__call__ method: the input (a string prompt), as well as the logits processor (an instance of LogitsProcessorList).

Source code in outlines/models/llamacpp.py
class LlamaCppTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `LlamaCpp` model.

    `LlamaCppTypeAdapter` is responsible for preparing the arguments to
    `llama-cpp-python`'s `Llama.__call__` method: the input (a string prompt),
    as well as the logits processor (an instance of `LogitsProcessorList`).

    """

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {input} is not available. "
            "The `llama-cpp-python` library does not support batch inference. "
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str:
        return model_input

    def format_output_type(
        self, output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> "LogitsProcessorList":
        """Generate the logits processor argument to pass to the model.

        Parameters
        ----------
        output_type
            The logits processor provided.

        Returns
        -------
        LogitsProcessorList
            The logits processor to pass to the model.

        """
        from llama_cpp import LogitsProcessorList

        return LogitsProcessorList([output_type])

format_input(model_input)

Generate the prompt argument to pass to the model.

Parameters:

Name Type Description Default
model_input

The input provided by the user.

required

Returns:

Type Description
str

The formatted input to be passed to the model.

Source code in outlines/models/llamacpp.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {input} is not available. "
        "The `llama-cpp-python` library does not support batch inference. "
    )

format_output_type(output_type=None)

Generate the logits processor argument to pass to the model.

Parameters:

Name Type Description Default
output_type Optional[OutlinesLogitsProcessor]

The logits processor provided.

None

Returns:

Type Description
LogitsProcessorList

The logits processor to pass to the model.

Source code in outlines/models/llamacpp.py
def format_output_type(
    self, output_type: Optional[OutlinesLogitsProcessor] = None,
) -> "LogitsProcessorList":
    """Generate the logits processor argument to pass to the model.

    Parameters
    ----------
    output_type
        The logits processor provided.

    Returns
    -------
    LogitsProcessorList
        The logits processor to pass to the model.

    """
    from llama_cpp import LogitsProcessorList

    return LogitsProcessorList([output_type])

from_llamacpp(model)

Create an Outlines LlamaCpp model instance from a llama_cpp.Llama instance.

Parameters:

Name Type Description Default
model Llama

A llama_cpp.Llama instance.

required

Returns:

Type Description
LlamaCpp

An Outlines LlamaCpp model instance.

Source code in outlines/models/llamacpp.py
def from_llamacpp(model: "Llama"):
    """Create an Outlines `LlamaCpp` model instance from a
    `llama_cpp.Llama` instance.

    Parameters
    ----------
    model
        A `llama_cpp.Llama` instance.

    Returns
    -------
    LlamaCpp
        An Outlines `LlamaCpp` model instance.

    """
    return LlamaCpp(model)

mlxlm

Integration with the mlx_lm library.

MLXLM

Bases: Model

Thin wrapper around an mlx_lm model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the mlx_lm library.

Source code in outlines/models/mlxlm.py
class MLXLM(Model):
    """Thin wrapper around an `mlx_lm` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `mlx_lm` library.

    """

    tensor_library_name = "mlx"

    def __init__(
        self,
        model: "nn.Module",
        tokenizer: "PreTrainedTokenizer",
    ):
        """
        Parameters
        ----------
        model
            An instance of an `mlx_lm` model.
        tokenizer
            An instance of an `mlx_lm` tokenizer or of a compatible
            `transformers` tokenizer.

        """
        self.model = model
        # self.mlx_tokenizer is used by the mlx-lm in its generate function
        self.mlx_tokenizer = tokenizer
        # self.tokenizer is used by the logits processor
        self.tokenizer = TransformerTokenizer(tokenizer._tokenizer)
        self.type_adapter = MLXLMTypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> str:
        """Generate text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        from mlx_lm import generate

        return generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> Iterator[str]:
        """Stream text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        from mlx_lm import stream_generate

        for gen_response in stream_generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        ):
            yield gen_response.text

__init__(model, tokenizer)

Parameters:

Name Type Description Default
model Module

An instance of an mlx_lm model.

required
tokenizer PreTrainedTokenizer

An instance of an mlx_lm tokenizer or of a compatible transformers tokenizer.

required
Source code in outlines/models/mlxlm.py
def __init__(
    self,
    model: "nn.Module",
    tokenizer: "PreTrainedTokenizer",
):
    """
    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        `transformers` tokenizer.

    """
    self.model = model
    # self.mlx_tokenizer is used by the mlx-lm in its generate function
    self.mlx_tokenizer = tokenizer
    # self.tokenizer is used by the logits processor
    self.tokenizer = TransformerTokenizer(tokenizer._tokenizer)
    self.type_adapter = MLXLMTypeAdapter()

generate(model_input, output_type=None, **kwargs)

Generate text using mlx-lm.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[OutlinesLogitsProcessor]

The logits processor the model will use to constrain the format of the generated text.

None
kwargs

Additional keyword arguments to pass to the mlx-lm library.

{}

Returns:

Type Description
str

The text generated by the model.

Source code in outlines/models/mlxlm.py
def generate(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> str:
    """Generate text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    from mlx_lm import generate

    return generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    )

generate_stream(model_input, output_type=None, **kwargs)

Stream text using mlx-lm.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[OutlinesLogitsProcessor]

The logits processor the model will use to constrain the format of the generated text.

None
kwargs

Additional keyword arguments to pass to the mlx-lm library.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/mlxlm.py
def generate_stream(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> Iterator[str]:
    """Stream text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    from mlx_lm import stream_generate

    for gen_response in stream_generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    ):
        yield gen_response.text

MLXLMTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the MLXLM model.

Source code in outlines/models/mlxlm.py
class MLXLMTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `MLXLM` model."""

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {input} is not available. "
            "The `mlx_lm` library does not support batch inference."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str):
        return model_input

    def format_output_type(
        self, output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional[List[OutlinesLogitsProcessor]]:
        """Generate the logits processor argument to pass to the model.

        Parameters
        ----------
        output_type
            The logits processor provided.

        Returns
        -------
        Optional[list[OutlinesLogitsProcessor]]
            The logits processor argument to be passed to the model.

        """
        if not output_type:
            return None
        return [output_type]

format_input(model_input)

Generate the prompt argument to pass to the model.

Parameters:

Name Type Description Default
model_input

The input provided by the user.

required

Returns:

Type Description
str

The formatted input to be passed to the model.

Source code in outlines/models/mlxlm.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {input} is not available. "
        "The `mlx_lm` library does not support batch inference."
    )

format_output_type(output_type=None)

Generate the logits processor argument to pass to the model.

Parameters:

Name Type Description Default
output_type Optional[OutlinesLogitsProcessor]

The logits processor provided.

None

Returns:

Type Description
Optional[list[OutlinesLogitsProcessor]]

The logits processor argument to be passed to the model.

Source code in outlines/models/mlxlm.py
def format_output_type(
    self, output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional[List[OutlinesLogitsProcessor]]:
    """Generate the logits processor argument to pass to the model.

    Parameters
    ----------
    output_type
        The logits processor provided.

    Returns
    -------
    Optional[list[OutlinesLogitsProcessor]]
        The logits processor argument to be passed to the model.

    """
    if not output_type:
        return None
    return [output_type]

from_mlxlm(model, tokenizer)

Create an Outlines MLXLM model instance from an mlx_lm model and a tokenizer.

Parameters:

Name Type Description Default
model Module

An instance of an mlx_lm model.

required
tokenizer PreTrainedTokenizer

An instance of an mlx_lm tokenizer or of a compatible transformers tokenizer.

required

Returns:

Type Description
MLXLM

An Outlines MLXLM model instance.

Source code in outlines/models/mlxlm.py
def from_mlxlm(model: "nn.Module", tokenizer: "PreTrainedTokenizer") -> MLXLM:
    """Create an Outlines `MLXLM` model instance from an `mlx_lm` model and a
    tokenizer.

    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        transformers tokenizer.

    Returns
    -------
    MLXLM
        An Outlines `MLXLM` model instance.

    """
    return MLXLM(model, tokenizer)

ollama

Integration with the ollama library.

Ollama

Bases: Model

Thin wrapper around the ollama.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the ollama.Client client.

Source code in outlines/models/ollama.py
class Ollama(Model):
    """Thin wrapper around the `ollama.Client` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `ollama.Client` client.

    """

    def __init__(
        self,client: "OllamaClient", model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `ollama.Client` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OllamaTypeAdapter()

    def generate(self,
        model_input: str,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        response = self.client.generate(
            prompt=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )
        return response.response

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        response = self.client.generate(
            prompt=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            stream=True,
            **kwargs,
        )
        for chunk in response:
            yield chunk.response

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client Client

The ollama.Client client.

required
model_name Optional[str]

The name of the model to use.

None
Source code in outlines/models/ollama.py
def __init__(
    self,client: "OllamaClient", model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `ollama.Client` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OllamaTypeAdapter()

generate(model_input, output_type=None, **kwargs)

Generate text using Ollama.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.

None
**kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
str

The text generated by the model.

Source code in outlines/models/ollama.py
def generate(self,
    model_input: str,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    response = self.client.generate(
        prompt=self.type_adapter.format_input(model_input),
        format=self.type_adapter.format_output_type(output_type),
        **kwargs,
    )
    return response.response

generate_stream(model_input, output_type=None, **kwargs)

Stream text using Ollama.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.

None
**kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/ollama.py
def generate_stream(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> Iterator[str]:
    """Stream text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    response = self.client.generate(
        prompt=self.type_adapter.format_input(model_input),
        format=self.type_adapter.format_output_type(output_type),
        stream=True,
        **kwargs,
    )
    for chunk in response:
        yield chunk.response

OllamaTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the Ollama model.

Source code in outlines/models/ollama.py
class OllamaTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Ollama` model."""

    def format_input(self, model_input: str) -> str:
        """Generate the prompt argument to pass to the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        if isinstance(model_input, str):
            return model_input
        raise TypeError(
            f"The input type {model_input} is not available. "
            "Ollama does not support batch inference."
        )

    def format_output_type(self, output_type: Optional[Any] = None) -> Optional[str]:
        """Format the output type to pass to the client.

        TODO: `int`, `float` and other Python types could be supported via
        JSON Schema.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        Optional[str]
            The formatted output type to be passed to the model.

        """
        if isinstance(output_type, Regex):
            raise TypeError(
                "Regex-based structured outputs are not supported by Ollama. "
                "Use an open source model in the meantime."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not supported by Ollama. "
                "Use an open source model in the meantime."
            )

        if output_type is None:
            return None
        elif isinstance(output_type, JsonSchema):
            return json.loads(output_type.schema)
        elif is_dataclass(output_type):
            schema = TypeAdapter(output_type).json_schema()
            return schema
        elif is_typed_dict(output_type):
            schema = TypeAdapter(output_type).json_schema()
            return schema
        elif is_pydantic_model(output_type):
            schema = output_type.model_json_schema()
            return schema
        elif is_genson_schema_builder(output_type):
            return output_type.to_json()
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by Ollama. "
                "Consider using a local model instead."
            )

format_input(model_input)

Generate the prompt argument to pass to the model.

Parameters:

Name Type Description Default
model_input str

The input provided by the user.

required

Returns:

Type Description
str

The formatted input to be passed to the model.

Source code in outlines/models/ollama.py
def format_input(self, model_input: str) -> str:
    """Generate the prompt argument to pass to the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    if isinstance(model_input, str):
        return model_input
    raise TypeError(
        f"The input type {model_input} is not available. "
        "Ollama does not support batch inference."
    )

format_output_type(output_type=None)

Format the output type to pass to the client.

TODO: int, float and other Python types could be supported via JSON Schema.

Parameters:

Name Type Description Default
output_type Optional[Any]

The output type provided by the user.

None

Returns:

Type Description
Optional[str]

The formatted output type to be passed to the model.

Source code in outlines/models/ollama.py
def format_output_type(self, output_type: Optional[Any] = None) -> Optional[str]:
    """Format the output type to pass to the client.

    TODO: `int`, `float` and other Python types could be supported via
    JSON Schema.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    Optional[str]
        The formatted output type to be passed to the model.

    """
    if isinstance(output_type, Regex):
        raise TypeError(
            "Regex-based structured outputs are not supported by Ollama. "
            "Use an open source model in the meantime."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not supported by Ollama. "
            "Use an open source model in the meantime."
        )

    if output_type is None:
        return None
    elif isinstance(output_type, JsonSchema):
        return json.loads(output_type.schema)
    elif is_dataclass(output_type):
        schema = TypeAdapter(output_type).json_schema()
        return schema
    elif is_typed_dict(output_type):
        schema = TypeAdapter(output_type).json_schema()
        return schema
    elif is_pydantic_model(output_type):
        schema = output_type.model_json_schema()
        return schema
    elif is_genson_schema_builder(output_type):
        return output_type.to_json()
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by Ollama. "
            "Consider using a local model instead."
        )

from_ollama(client, model_name=None)

Create an Outlines Ollama model instance from an ollama.Client client.

Parameters:

Name Type Description Default
client Client

A ollama.Client client instance.

required
model_name Optional[str]

The name of the model to use.

None

Returns:

Type Description
Ollama

An Outlines Ollama model instance.

Source code in outlines/models/ollama.py
def from_ollama(
    client: "OllamaClient", model_name: Optional[str] = None
) -> Ollama:
    """Create an Outlines `Ollama` model instance from an `ollama.Client`
    client.

    Parameters
    ----------
    client
        A `ollama.Client` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Ollama
        An Outlines `Ollama` model instance.

    """
    return Ollama(client, model_name)

openai

Integration with OpenAI's API.

OpenAI

Bases: Model

Thin wrapper around the openai.OpenAI client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client.

Source code in outlines/models/openai.py
class OpenAI(Model):
    """Thin wrapper around the `openai.OpenAI` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client.

    """

    def __init__(
        self,
        client: Union["OpenAIClient", "AzureOpenAIClient"],
        model_name: Optional[Union[str, "OpenAIConfig"]] = None,
        **kwargs
    ):
        """Initialize the OpenAI model.

        To provide temporary backwards compatibility with Outlines v0,
        the class can be instantiated with a `OpenAIConfig` instance as
        a value for the `model_name` argument. This is deprecated and will
        be removed in v1.1.0. Please provide a model name instead.

        Parameters
        ----------
        client
            The `openai.OpenAI` client.
        model_name
            The name of the model to use.

        """

        # legacy mode
        if isinstance(model_name, OpenAIConfig) or kwargs.get("config"):
            warnings.warn("""
                The `openai` function is deprecated starting from v1.0.0.
                Do not use it. Support for it will be removed in v1.1.0.
                Instead, you should instantiate a `OpenAI` model with the
                `outlines.from_openai` function that takes an openai library
                client and a model name as arguments. Similarly, you cannot
                instantiate a `OpenAI` model directly with a `OpenAIConfig`
                instance anymore, but must provide a client and a model name
                instead.
                For example:
                ```python
                from openai import OpenAI as OpenAIClient
                from outlines import from_openai
                client = OpenAIClient()
                model = from_openai(client, "gpt-4o")
                ```
            """,
            DeprecationWarning,
            stacklevel=2,
            )
            config = (
                model_name
                if isinstance(model_name, OpenAIConfig)
                else kwargs.pop("config")
            )
            self.legacy_instance = OpenAILegacy(
                client, config, kwargs.get("system_prompt")
            )
        # regular mode
        else:
            self.client = client
            self.model_name = model_name
            self.type_adapter = OpenAITypeAdapter()

    def generate(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        import openai

        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        try:
            result = self.client.chat.completions.create(
                **messages,
                **response_format,
                **inference_kwargs,
            )
        except openai.BadRequestError as e:
            if e.body["message"].startswith("Invalid schema"):
                raise TypeError(
                    f"OpenAI does not support your schema: {e.body['message']}. "
                    "Try a local model or dottxt instead."
                )
            else:
                raise e

        messages = [choice.message for choice in result.choices]
        for message in messages:
            if message.refusal is not None:
                raise ValueError(
                    f"OpenAI refused to answer the request: {message.refusal}"
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_stream(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Stream text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        import openai

        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        stream = self.client.chat.completions.create(
            stream=True,
            **messages,
            **response_format,
            **inference_kwargs
        )

        for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

    ### Legacy !!!

    def __call__(self, *args, **kwargs):
        if hasattr(self, "legacy_instance"):
            return self.legacy_instance(*args, **kwargs)
        else:
            return super().__call__(*args, **kwargs)

    def stream(self, *args, **kwargs):
        if hasattr(self, "legacy_instance"):
            return self.legacy_instance.stream(*args, **kwargs)
        else:
            return super().stream(*args, **kwargs)

    def new_with_replacements(self, **kwargs):
        if hasattr(self, "legacy_instance"):
            return self.legacy_instance.new_with_replacements(self, **kwargs)
        raise NotImplementedError("This method is only available in legacy mode")

    def __str__(self):
        if hasattr(self, "legacy_instance"):
            return str(self.legacy_instance)
        else:
            return super().__str__()

    def __repr__(self):
        if hasattr(self, "legacy_instance"):
            return repr(self.legacy_instance)
        else:
            return super().__repr__()

__init__(client, model_name=None, **kwargs)

Initialize the OpenAI model.

To provide temporary backwards compatibility with Outlines v0, the class can be instantiated with a OpenAIConfig instance as a value for the model_name argument. This is deprecated and will be removed in v1.1.0. Please provide a model name instead.

Parameters:

Name Type Description Default
client Union[OpenAI, AzureOpenAI]

The openai.OpenAI client.

required
model_name Optional[Union[str, OpenAIConfig]]

The name of the model to use.

None
Source code in outlines/models/openai.py
def __init__(
    self,
    client: Union["OpenAIClient", "AzureOpenAIClient"],
    model_name: Optional[Union[str, "OpenAIConfig"]] = None,
    **kwargs
):
    """Initialize the OpenAI model.

    To provide temporary backwards compatibility with Outlines v0,
    the class can be instantiated with a `OpenAIConfig` instance as
    a value for the `model_name` argument. This is deprecated and will
    be removed in v1.1.0. Please provide a model name instead.

    Parameters
    ----------
    client
        The `openai.OpenAI` client.
    model_name
        The name of the model to use.

    """

    # legacy mode
    if isinstance(model_name, OpenAIConfig) or kwargs.get("config"):
        warnings.warn("""
            The `openai` function is deprecated starting from v1.0.0.
            Do not use it. Support for it will be removed in v1.1.0.
            Instead, you should instantiate a `OpenAI` model with the
            `outlines.from_openai` function that takes an openai library
            client and a model name as arguments. Similarly, you cannot
            instantiate a `OpenAI` model directly with a `OpenAIConfig`
            instance anymore, but must provide a client and a model name
            instead.
            For example:
            ```python
            from openai import OpenAI as OpenAIClient
            from outlines import from_openai
            client = OpenAIClient()
            model = from_openai(client, "gpt-4o")
            ```
        """,
        DeprecationWarning,
        stacklevel=2,
        )
        config = (
            model_name
            if isinstance(model_name, OpenAIConfig)
            else kwargs.pop("config")
        )
        self.legacy_instance = OpenAILegacy(
            client, config, kwargs.get("system_prompt")
        )
    # regular mode
    else:
        self.client = client
        self.model_name = model_name
        self.type_adapter = OpenAITypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using OpenAI.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Union[type[BaseModel], str]]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.

None
**inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Union[str, list[str]]

The text generated by the model.

Source code in outlines/models/openai.py
def generate(
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    import openai

    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    try:
        result = self.client.chat.completions.create(
            **messages,
            **response_format,
            **inference_kwargs,
        )
    except openai.BadRequestError as e:
        if e.body["message"].startswith("Invalid schema"):
            raise TypeError(
                f"OpenAI does not support your schema: {e.body['message']}. "
                "Try a local model or dottxt instead."
            )
        else:
            raise e

    messages = [choice.message for choice in result.choices]
    for message in messages:
        if message.refusal is not None:
            raise ValueError(
                f"OpenAI refused to answer the request: {message.refusal}"
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

generate_stream(model_input, output_type=None, **inference_kwargs)

Stream text using OpenAI.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Union[type[BaseModel], str]]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.

None
**inference_kwargs

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/openai.py
def generate_stream(
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Stream text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    import openai

    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    stream = self.client.chat.completions.create(
        stream=True,
        **messages,
        **response_format,
        **inference_kwargs
    )

    for chunk in stream:
        if chunk.choices and chunk.choices[0].delta.content is not None:
            yield chunk.choices[0].delta.content

OpenAIConfig dataclass

Represents the parameters of the OpenAI API.

The information was last fetched on 2023/11/20. We document below the properties that are specific to the OpenAI API. Not all these properties are supported by Outlines.

Parameters:

Name Type Description Default
model str

The name of the model. Available models can be found on OpenAI's website.

''
frequency_penalty float

Number between 2.0 and -2.0. Positive values penalize new tokens based on their existing frequency in the text,

0
logit_bias Dict[int, int]

Modifies the likelihood of specified tokens to appear in the completion. Number between -100 (forbid) and +100 (only allows).

dict()
n int

The number of completions to return for each prompt.

1
presence_penalty float

Similar to frequency penalty.

0
response_format Optional[Dict[str, str]]

Specifies the format the model must output. {"type": "json_object"} enables JSON mode.

None
seed Optional[int]

Two completions with the same seed value should return the same completion. This is however not guaranteed.

None
stop Optional[Union[str, List[str]]]

Up to 4 words where the API will stop the completion.

None
temperature float

Number between 0 and 2. Higher values make the output more random, while lower values make it more deterministic.

1.0
top_p int

Number between 0 and 1. Parameter for nucleus sampling.

1
user str

A unique identifier for the end-user.

str()
Source code in outlines/models/openai.py
@dataclass(frozen=True)
class OpenAIConfig:
    """Represents the parameters of the OpenAI API.

    The information was last fetched on 2023/11/20. We document below the
    properties that are specific to the OpenAI API. Not all these properties are
    supported by Outlines.

    Parameters
    ----------
    model
        The name of the model. Available models can be found on OpenAI's website.
    frequency_penalty
        Number between 2.0 and -2.0. Positive values penalize new tokens based on
        their existing frequency in the text,
    logit_bias
        Modifies the likelihood of specified tokens to appear in the completion.
        Number between -100 (forbid) and +100 (only allows).
    n
        The number of completions to return for each prompt.
    presence_penalty
        Similar to frequency penalty.
    response_format
        Specifies the format the model must output. `{"type": "json_object"}`
        enables JSON mode.
    seed
        Two completions with the same `seed` value should return the same
        completion. This is however not guaranteed.
    stop
        Up to 4 words where the API will stop the completion.
    temperature
        Number between 0 and 2. Higher values make the output more random, while
        lower values make it more deterministic.
    top_p
        Number between 0 and 1. Parameter for nucleus sampling.
    user
        A unique identifier for the end-user.
    """

    model: str = ""
    frequency_penalty: float = 0
    logit_bias: Dict[int, int] = field(default_factory=dict)
    max_tokens: Optional[int] = None
    n: int = 1
    presence_penalty: float = 0
    response_format: Optional[Dict[str, str]] = None
    seed: Optional[int] = None
    stop: Optional[Union[str, List[str]]] = None
    temperature: float = 1.0
    top_p: int = 1
    user: str = field(default_factory=str)

OpenAILegacy

An object that represents the OpenAI API.

Source code in outlines/models/openai.py
class OpenAILegacy():
    """An object that represents the OpenAI API."""

    def __init__(
        self,
        client,
        config,
        system_prompt: Optional[str] = None,
    ):
        """Create an `OpenAI` instance.

        This class supports the standard OpenAI API, the Azure OpeanAI API as
        well as compatible APIs that rely on the OpenAI client.

        Parameters
        ----------
        client
            An instance of the API's async client.
        config
            An instance of `OpenAIConfig`. Can be useful to specify some
            parameters that cannot be set by calling this class' methods.
        """

        self.client = client
        self.config = config
        self.system_prompt = system_prompt

        # We count the total number of prompt and generated tokens as returned
        # by the OpenAI API, summed over all the requests performed with this
        # model instance.
        self.prompt_tokens = 0
        self.completion_tokens = 0

        self.format_sequence = lambda x: x

    def __call__(
        self,
        prompt: Union[str, List[str]],
        max_tokens: Optional[int] = None,
        stop_at: Optional[Union[List[str], str]] = None,
        *,
        system_prompt: Optional[str] = None,
        temperature: Optional[float] = None,
        samples: Optional[int] = None,
    ):
        """Call the OpenAI API to generate text.

        Parameters
        ----------
        prompt
            A string or list of strings that will be used to prompt the model
        max_tokens
            The maximum number of tokens to generate
        stop_at
            A string or array of strings which, such that the generation stops
            when they are generated.
        system_prompt
            The content of the system message that precedes the user's prompt.
        temperature
            The value of the temperature used to sample tokens
        samples
            The number of completions to generate for each prompt
        stop_at
            Up to 4 words where the API will stop the completion.

        """
        if max_tokens is None:
            max_tokens = self.config.max_tokens
        if stop_at is None:
            stop_at = self.config.stop
        if temperature is None:
            temperature = self.config.temperature
        if samples is None:
            samples = self.config.n

        config = replace(
            self.config,
            max_tokens=max_tokens,
            temperature=temperature,
            n=samples,
            stop=stop_at,
        )  # type: ignore

        response, prompt_tokens, completion_tokens = generate_chat(
            prompt, system_prompt or self.system_prompt, self.client, config
        )
        self.prompt_tokens += prompt_tokens
        self.completion_tokens += completion_tokens

        return self.format_sequence(response)

    def stream(self, *args, **kwargs):
        raise NotImplementedError(
            "Streaming is currently not supported for the OpenAI API"
        )

    def new_with_replacements(self, model, **kwargs):
        new_instance = copy.copy(model)
        new_instance.legacy_instance.config = replace(
            new_instance.legacy_instance.config, **kwargs
        )
        return new_instance

    def __str__(self):
        return self.__class__.__name__ + " API"

    def __repr__(self):
        return str(self.config)

__call__(prompt, max_tokens=None, stop_at=None, *, system_prompt=None, temperature=None, samples=None)

Call the OpenAI API to generate text.

Parameters:

Name Type Description Default
prompt Union[str, List[str]]

A string or list of strings that will be used to prompt the model

required
max_tokens Optional[int]

The maximum number of tokens to generate

None
stop_at Optional[Union[List[str], str]]

A string or array of strings which, such that the generation stops when they are generated.

None
system_prompt Optional[str]

The content of the system message that precedes the user's prompt.

None
temperature Optional[float]

The value of the temperature used to sample tokens

None
samples Optional[int]

The number of completions to generate for each prompt

None
stop_at Optional[Union[List[str], str]]

Up to 4 words where the API will stop the completion.

None
Source code in outlines/models/openai.py
def __call__(
    self,
    prompt: Union[str, List[str]],
    max_tokens: Optional[int] = None,
    stop_at: Optional[Union[List[str], str]] = None,
    *,
    system_prompt: Optional[str] = None,
    temperature: Optional[float] = None,
    samples: Optional[int] = None,
):
    """Call the OpenAI API to generate text.

    Parameters
    ----------
    prompt
        A string or list of strings that will be used to prompt the model
    max_tokens
        The maximum number of tokens to generate
    stop_at
        A string or array of strings which, such that the generation stops
        when they are generated.
    system_prompt
        The content of the system message that precedes the user's prompt.
    temperature
        The value of the temperature used to sample tokens
    samples
        The number of completions to generate for each prompt
    stop_at
        Up to 4 words where the API will stop the completion.

    """
    if max_tokens is None:
        max_tokens = self.config.max_tokens
    if stop_at is None:
        stop_at = self.config.stop
    if temperature is None:
        temperature = self.config.temperature
    if samples is None:
        samples = self.config.n

    config = replace(
        self.config,
        max_tokens=max_tokens,
        temperature=temperature,
        n=samples,
        stop=stop_at,
    )  # type: ignore

    response, prompt_tokens, completion_tokens = generate_chat(
        prompt, system_prompt or self.system_prompt, self.client, config
    )
    self.prompt_tokens += prompt_tokens
    self.completion_tokens += completion_tokens

    return self.format_sequence(response)

__init__(client, config, system_prompt=None)

Create an OpenAI instance.

This class supports the standard OpenAI API, the Azure OpeanAI API as well as compatible APIs that rely on the OpenAI client.

Parameters:

Name Type Description Default
client

An instance of the API's async client.

required
config

An instance of OpenAIConfig. Can be useful to specify some parameters that cannot be set by calling this class' methods.

required
Source code in outlines/models/openai.py
def __init__(
    self,
    client,
    config,
    system_prompt: Optional[str] = None,
):
    """Create an `OpenAI` instance.

    This class supports the standard OpenAI API, the Azure OpeanAI API as
    well as compatible APIs that rely on the OpenAI client.

    Parameters
    ----------
    client
        An instance of the API's async client.
    config
        An instance of `OpenAIConfig`. Can be useful to specify some
        parameters that cannot be set by calling this class' methods.
    """

    self.client = client
    self.config = config
    self.system_prompt = system_prompt

    # We count the total number of prompt and generated tokens as returned
    # by the OpenAI API, summed over all the requests performed with this
    # model instance.
    self.prompt_tokens = 0
    self.completion_tokens = 0

    self.format_sequence = lambda x: x

OpenAITypeAdapter

Bases: ModelTypeAdapter

Type adapter for the OpenAI model.

OpenAITypeAdapter is responsible for preparing the arguments to OpenAI's completions.create methods: the input (prompt and possibly image), as well as the output type (only JSON).

Source code in outlines/models/openai.py
class OpenAITypeAdapter(ModelTypeAdapter):
    """Type adapter for the `OpenAI` model.

    `OpenAITypeAdapter` is responsible for preparing the arguments to OpenAI's
    `completions.create` methods: the input (prompt and possibly image), as
    well as the output type (only JSON).

    """

    def format_input(self, model_input: Union[str, Vision]) -> dict:
        """Generate the `messages` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        dict
            The formatted input to be passed to the client.

        """
        if isinstance(model_input, str):
            return self.format_str_model_input(model_input)
        elif isinstance(model_input, Vision):
            return self.format_vision_model_input(model_input)
        raise TypeError(
            f"The input type {input} is not available with OpenAI. "
            "The only available types are `str` and `Vision`."
        )

    def format_str_model_input(self, model_input: str) -> dict:
        """Generate the `messages` argument to pass to the client when the user
        only passes a prompt.

        """
        return {
            "messages": [
                {
                    "role": "user",
                    "content": model_input,
                }
            ]
        }

    def format_vision_model_input(self, model_input: Vision) -> dict:
        """Generate the `messages` argument to pass to the client when the user
        passes a prompt and an image.

        """
        return {
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": model_input.prompt},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:{model_input.image_format};base64,{model_input.image_str}"  # noqa: E702
                            },
                        },
                    ],
                }
            ]
        }

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the `response_format` argument to the client based on the
        output type specified by the user.

        TODO: `int`, `float` and other Python types could be supported via
        JSON Schema.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        dict
            The formatted output type to be passed to the client.

        """
        # Unsupported languages
        if isinstance(output_type, Regex):
            raise TypeError(
                "Neither regex-based structured outputs nor the `pattern` keyword "
                "in Json Schema are available with OpenAI. Use an open source "
                "model or dottxt instead."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not available with OpenAI. "
                "Use an open source model or dottxt instead."
            )

        if output_type is None:
            return {}
        elif is_native_dict(output_type):
            return self.format_json_mode_type()
        elif is_dataclass(output_type):
            output_type = TypeAdapter(output_type).json_schema()
            return self.format_json_output_type(output_type)
        elif is_typed_dict(output_type):
            output_type = TypeAdapter(output_type).json_schema()
            return self.format_json_output_type(output_type)
        elif is_pydantic_model(output_type):
            output_type = output_type.model_json_schema()
            return self.format_json_output_type(output_type)
        elif is_genson_schema_builder(output_type):
            schema = json.loads(output_type.to_json())
            return self.format_json_output_type(schema)
        elif isinstance(output_type, JsonSchema):
            return self.format_json_output_type(json.loads(output_type.schema))
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not available with OpenAI. "
                "Use an open source model or dottxt instead."
            )

    def format_json_output_type(self, schema: dict) -> dict:
        """Generate the `response_format` argument to the client when the user
        specified a `Json` output type.

        """
        # OpenAI requires `additionalProperties` to be set
        if "additionalProperties" not in schema:
            schema["additionalProperties"] = False

        return {
            "response_format": {
                "type": "json_schema",
                "json_schema": {
                    "name": "default",
                    "strict": True,
                    "schema": schema,
                },
            }
        }

    def format_json_mode_type(self) -> dict:
        """Generate the `response_format` argument to the client when the user
        specified the output type should be a JSON but without specifying the
        schema (also called "JSON mode").

        """
        return {"response_format": {"type": "json_object"}}

format_input(model_input)

Generate the messages argument to pass to the client.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The input provided by the user.

required

Returns:

Type Description
dict

The formatted input to be passed to the client.

Source code in outlines/models/openai.py
def format_input(self, model_input: Union[str, Vision]) -> dict:
    """Generate the `messages` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    dict
        The formatted input to be passed to the client.

    """
    if isinstance(model_input, str):
        return self.format_str_model_input(model_input)
    elif isinstance(model_input, Vision):
        return self.format_vision_model_input(model_input)
    raise TypeError(
        f"The input type {input} is not available with OpenAI. "
        "The only available types are `str` and `Vision`."
    )

format_json_mode_type()

Generate the response_format argument to the client when the user specified the output type should be a JSON but without specifying the schema (also called "JSON mode").

Source code in outlines/models/openai.py
def format_json_mode_type(self) -> dict:
    """Generate the `response_format` argument to the client when the user
    specified the output type should be a JSON but without specifying the
    schema (also called "JSON mode").

    """
    return {"response_format": {"type": "json_object"}}

format_json_output_type(schema)

Generate the response_format argument to the client when the user specified a Json output type.

Source code in outlines/models/openai.py
def format_json_output_type(self, schema: dict) -> dict:
    """Generate the `response_format` argument to the client when the user
    specified a `Json` output type.

    """
    # OpenAI requires `additionalProperties` to be set
    if "additionalProperties" not in schema:
        schema["additionalProperties"] = False

    return {
        "response_format": {
            "type": "json_schema",
            "json_schema": {
                "name": "default",
                "strict": True,
                "schema": schema,
            },
        }
    }

format_output_type(output_type=None)

Generate the response_format argument to the client based on the output type specified by the user.

TODO: int, float and other Python types could be supported via JSON Schema.

Parameters:

Name Type Description Default
output_type Optional[Any]

The output type provided by the user.

None

Returns:

Type Description
dict

The formatted output type to be passed to the client.

Source code in outlines/models/openai.py
def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the `response_format` argument to the client based on the
    output type specified by the user.

    TODO: `int`, `float` and other Python types could be supported via
    JSON Schema.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    dict
        The formatted output type to be passed to the client.

    """
    # Unsupported languages
    if isinstance(output_type, Regex):
        raise TypeError(
            "Neither regex-based structured outputs nor the `pattern` keyword "
            "in Json Schema are available with OpenAI. Use an open source "
            "model or dottxt instead."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not available with OpenAI. "
            "Use an open source model or dottxt instead."
        )

    if output_type is None:
        return {}
    elif is_native_dict(output_type):
        return self.format_json_mode_type()
    elif is_dataclass(output_type):
        output_type = TypeAdapter(output_type).json_schema()
        return self.format_json_output_type(output_type)
    elif is_typed_dict(output_type):
        output_type = TypeAdapter(output_type).json_schema()
        return self.format_json_output_type(output_type)
    elif is_pydantic_model(output_type):
        output_type = output_type.model_json_schema()
        return self.format_json_output_type(output_type)
    elif is_genson_schema_builder(output_type):
        schema = json.loads(output_type.to_json())
        return self.format_json_output_type(schema)
    elif isinstance(output_type, JsonSchema):
        return self.format_json_output_type(json.loads(output_type.schema))
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not available with OpenAI. "
            "Use an open source model or dottxt instead."
        )

format_str_model_input(model_input)

Generate the messages argument to pass to the client when the user only passes a prompt.

Source code in outlines/models/openai.py
def format_str_model_input(self, model_input: str) -> dict:
    """Generate the `messages` argument to pass to the client when the user
    only passes a prompt.

    """
    return {
        "messages": [
            {
                "role": "user",
                "content": model_input,
            }
        ]
    }

format_vision_model_input(model_input)

Generate the messages argument to pass to the client when the user passes a prompt and an image.

Source code in outlines/models/openai.py
def format_vision_model_input(self, model_input: Vision) -> dict:
    """Generate the `messages` argument to pass to the client when the user
    passes a prompt and an image.

    """
    return {
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": model_input.prompt},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:{model_input.image_format};base64,{model_input.image_str}"  # noqa: E702
                        },
                    },
                ],
            }
        ]
    }

error_handler(api_call_fn)

Handle OpenAI API errors and missing API key.

Source code in outlines/models/openai.py
def error_handler(api_call_fn: Callable) -> Callable:
    """Handle OpenAI API errors and missing API key."""

    def call(*args, **kwargs):
        import openai

        try:
            return api_call_fn(*args, **kwargs)
        except (
            openai.APITimeoutError,
            openai.InternalServerError,
            openai.RateLimitError,
        ) as e:
            raise OSError(f"Could not connect to the OpenAI API: {e}")
        except (
            openai.AuthenticationError,
            openai.BadRequestError,
            openai.ConflictError,
            openai.PermissionDeniedError,
            openai.NotFoundError,
            openai.UnprocessableEntityError,
        ) as e:
            raise e

    return call

from_openai(client, model_name=None)

Create an Outlines OpenAI model instance from an openai.OpenAI client.

Parameters:

Name Type Description Default
client Union[OpenAI, AzureOpenAI]

An openai.OpenAI client instance.

required
model_name Optional[str]

The name of the model to use.

None

Returns:

Type Description
OpenAI

An Outlines OpenAI model instance.

Source code in outlines/models/openai.py
def from_openai(
    client: Union["OpenAIClient", "AzureOpenAIClient"],
    model_name: Optional[str] = None,
) -> OpenAI:
    """Create an Outlines `OpenAI` model instance from an `openai.OpenAI`
    client.

    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    OpenAI
        An Outlines `OpenAI` model instance.

    """
    return OpenAI(client, model_name)

generate_chat(prompt, system_prompt, client, config) async

Call OpenAI's Chat Completion API.

Parameters:

Name Type Description Default
prompt str

The prompt we use to start the generation. Passed to the model with the "user" role.

required
system_prompt Union[str, None]

The system prompt, passed to the model with the "system" role before the prompt.

required
client

The API client

required
config OpenAIConfig

An OpenAIConfig instance.

required

Returns:

Type Description
A tuple that contains the model's response(s) and usage statistics.
Source code in outlines/models/openai.py
@functools.partial(vectorize, signature="(),(),(),()->(s),(),()")
async def generate_chat(
    prompt: str,
    system_prompt: Union[str, None],
    client,
    config: OpenAIConfig,
):
    """Call OpenAI's Chat Completion API.

    Parameters
    ----------
    prompt
        The prompt we use to start the generation. Passed to the model
        with the "user" role.
    system_prompt
        The system prompt, passed to the model with the "system" role
        before the prompt.
    client
        The API client
    config
        An `OpenAIConfig` instance.

    Returns
    -------
    A tuple that contains the model's response(s) and usage statistics.

    """
    import numpy as np

    @error_handler
    @cache()
    async def call_api(prompt, system_prompt, config):
        responses = await client.chat.completions.create(
            messages=system_message + user_message,
            **asdict(config),  # type: ignore
        )
        return responses.model_dump()

    system_message = (
        [{"role": "system", "content": system_prompt}] if system_prompt else []
    )
    user_message = [{"role": "user", "content": prompt}]

    responses = await call_api(prompt, system_prompt, config)

    results = np.array(
        [responses["choices"][i]["message"]["content"] for i in range(config.n)]
    )
    usage = responses["usage"]

    return results, usage["prompt_tokens"], usage["completion_tokens"]

sglang

Integration with an SGLang server.

AsyncSGLang

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py
class AsyncSGLang(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.
        model_name
            The name of the model to use.

        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    async def generate(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using `sglang`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise ValueError(
                    f"The sglang server refused to answer the request: "
                    f"{message.refusal}"
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_stream( # type: ignore
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Return a text generator.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )

        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            **messages,
            **inference_kwargs,
        }

        return client_args

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client

An openai.AsyncOpenAI client instance.

required
model_name Optional[str]

The name of the model to use.

None

Parameters:

Name Type Description Default
client

An openai.AsyncOpenAI client instance.

required
Source code in outlines/models/sglang.py
def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.
    model_name
        The name of the model to use.

    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs) async

Generate text using sglang.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Union[str, list[str]]

The text generated by the model.

Source code in outlines/models/sglang.py
async def generate(
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using `sglang`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise ValueError(
                f"The sglang server refused to answer the request: "
                f"{message.refusal}"
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

generate_stream(model_input, output_type=None, **inference_kwargs) async

Return a text generator.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
AsyncIterator[str]

An async iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py
async def generate_stream( # type: ignore
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Return a text generator.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    stream = await self.client.chat.completions.create(
        **client_args,
        stream=True,
    )

    async for chunk in stream:  # pragma: no cover
        if chunk.choices and chunk.choices[0].delta.content is not None:
            yield chunk.choices[0].delta.content

SGLang

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py
class SGLang(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    def generate(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise ValueError(
                    f"The SGLang server refused to answer the request: "
                    f"{message.refusal}"
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_stream(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )

        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            **messages,
            **inference_kwargs,
        }

        return client_args

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client

An openai.OpenAI client instance.

required
model_name Optional[str]

The name of the model to use.

None
Source code in outlines/models/sglang.py
def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using SGLang.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Union[str, list[str]]

The text generated by the model.

Source code in outlines/models/sglang.py
def generate(
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise ValueError(
                f"The SGLang server refused to answer the request: "
                f"{message.refusal}"
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

generate_stream(model_input, output_type=None, **inference_kwargs)

Stream text using SGLang.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py
def generate_stream(
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    stream = self.client.chat.completions.create(
        **client_args, stream=True,
    )

    for chunk in stream:  # pragma: no cover
        if chunk.choices and chunk.choices[0].delta.content is not None:
            yield chunk.choices[0].delta.content

SGLangTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the SGLang and AsyncSGLang models.

Source code in outlines/models/sglang.py
class SGLangTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `SGLang` and `AsyncSGLang` models."""

    def format_input(self, model_input: Union[str, Vision]) -> dict:
        """Generate the prompt argument to pass to the client.

        We rely on the OpenAITypeAdapter to format the input as the sglang
        server expects input in the same format as OpenAI.

        Parameters
        ----------
        model_input
            The input passed by the user.

        Returns
        -------
        dict
            The formatted input to be passed to the client.

        """
        return OpenAITypeAdapter().format_input(model_input)

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the client.

        Parameters
        ----------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The formatted output type to be passed to the client.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            warnings.warn(
                "SGLang grammar-based structured outputs expects an EBNF "
                "grammar instead of a Lark grammar as is generally used in "
                "Outlines. The grammar cannot be used as a structured output "
                "type with an outlines backend, it is only compatible with "
                "the sglang and llguidance backends."
            )
            return {"extra_body": {"ebnf": term.definition}}
        elif isinstance(term, JsonSchema):
            return OpenAITypeAdapter().format_json_output_type(
                json.loads(term.schema)
            )
        else:
            return {"extra_body": {"regex": to_regex(term)}}

format_input(model_input)

Generate the prompt argument to pass to the client.

We rely on the OpenAITypeAdapter to format the input as the sglang server expects input in the same format as OpenAI.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The input passed by the user.

required

Returns:

Type Description
dict

The formatted input to be passed to the client.

Source code in outlines/models/sglang.py
def format_input(self, model_input: Union[str, Vision]) -> dict:
    """Generate the prompt argument to pass to the client.

    We rely on the OpenAITypeAdapter to format the input as the sglang
    server expects input in the same format as OpenAI.

    Parameters
    ----------
    model_input
        The input passed by the user.

    Returns
    -------
    dict
        The formatted input to be passed to the client.

    """
    return OpenAITypeAdapter().format_input(model_input)

format_output_type(output_type=None)

Generate the structured output argument to pass to the client.

Parameters:

Name Type Description Default
output_type Optional[Any]

The structured output type provided.

None

Returns:

Type Description
dict

The formatted output type to be passed to the client.

Source code in outlines/models/sglang.py
def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the client.

    Parameters
    ----------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The formatted output type to be passed to the client.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        warnings.warn(
            "SGLang grammar-based structured outputs expects an EBNF "
            "grammar instead of a Lark grammar as is generally used in "
            "Outlines. The grammar cannot be used as a structured output "
            "type with an outlines backend, it is only compatible with "
            "the sglang and llguidance backends."
        )
        return {"extra_body": {"ebnf": term.definition}}
    elif isinstance(term, JsonSchema):
        return OpenAITypeAdapter().format_json_output_type(
            json.loads(term.schema)
        )
    else:
        return {"extra_body": {"regex": to_regex(term)}}

from_sglang(client, model_name=None)

Create a SGLang or AsyncSGLang instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name Type Description Default
client Union[OpenAI, AsyncOpenAI]

An openai.OpenAI or openai.AsyncOpenAI instance.

required
model_name Optional[str]

The name of the model to use.

None

Returns:

Type Description
Union[SGLang, AsyncSGLang]

An Outlines SGLang or AsyncSGLang model instance.

Source code in outlines/models/sglang.py
def from_sglang(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[SGLang, AsyncSGLang]:
    """Create a `SGLang` or `AsyncSGLang` instance from an `openai.OpenAI` or
    `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[SGLang, AsyncSGLang]
        An Outlines `SGLang` or `AsyncSGLang` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return SGLang(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncSGLang(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

tgi

Integration with a TGI server.

AsyncTGI

Bases: AsyncModel

Thin async wrapper around a huggingface_hub.AsyncInferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.AsyncInferenceClient client.

Source code in outlines/models/tgi.py
class AsyncTGI(AsyncModel):
    """Thin async wrapper around a `huggingface_hub.AsyncInferenceClient`
    client used to communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.AsyncInferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `AsyncInferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    async def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        response = await self.client.text_generation(**client_args)

        return response

    async def generate_stream( # type: ignore
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        stream = await self.client.text_generation(
            **client_args, stream=True
        )

        async for chunk in stream:  # pragma: no cover
            yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

__init__(client)

Parameters:

Name Type Description Default
client

A huggingface AsyncInferenceClient client instance.

required
Source code in outlines/models/tgi.py
def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `AsyncInferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

generate(model_input, output_type=None, **inference_kwargs) async

Generate text using TGI.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types except CFG are supported provided your server uses a backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
str

The text generated by the model.

Source code in outlines/models/tgi.py
async def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    response = await self.client.text_generation(**client_args)

    return response

generate_stream(model_input, output_type=None, **inference_kwargs) async

Stream text using TGI.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types except CFG are supported provided your server uses a backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
AsyncIterator[str]

An async iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py
async def generate_stream( # type: ignore
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    stream = await self.client.text_generation(
        **client_args, stream=True
    )

    async for chunk in stream:  # pragma: no cover
        yield chunk

TGI

Bases: Model

Thin wrapper around a huggingface_hub.InferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.InferenceClient client.

Source code in outlines/models/tgi.py
class TGI(Model):
    """Thin wrapper around a `huggingface_hub.InferenceClient` client used to
    communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.InferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `InferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        return self.client.text_generation(**client_args)

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        stream = self.client.text_generation(
            **client_args, stream=True,
        )

        for chunk in stream:  # pragma: no cover
            yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

__init__(client)

Parameters:

Name Type Description Default
client

A huggingface InferenceClient client instance.

required
Source code in outlines/models/tgi.py
def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `InferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using TGI.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types except CFG are supported provided your server uses a backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
str

The text generated by the model.

Source code in outlines/models/tgi.py
def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    return self.client.text_generation(**client_args)

generate_stream(model_input, output_type=None, **inference_kwargs)

Stream text using TGI.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types except CFG are supported provided your server uses a backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py
def generate_stream(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    stream = self.client.text_generation(
        **client_args, stream=True,
    )

    for chunk in stream:  # pragma: no cover
        yield chunk

TGITypeAdapter

Bases: ModelTypeAdapter

Type adapter for the TGI and AsyncTGI models.

Source code in outlines/models/tgi.py
class TGITypeAdapter(ModelTypeAdapter):
    """Type adapter for the `TGI` and `AsyncTGI` models."""

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the client.

        Argument
        --------
        model_input
            The input passed by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {input} is not available with TGI. "
            + "Please provide a string."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str:
        return model_input

    @format_input.register(list)
    def format_list_input(self, model_input: list):
        raise NotImplementedError("TGI does not support batch inference.")

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the client.

        Argument
        --------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The structured output argument to pass to the client.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            raise NotImplementedError(
                "TGI does not support CFG-based structured outputs."
            )
        elif isinstance(term, JsonSchema):
            return {
                "grammar": {
                    "type": "json",
                    "value": json.loads(term.schema),
                }
            }
        else:
            return {
                "grammar": {
                    "type": "regex",
                    "value": to_regex(term),
                }
            }

format_input(model_input)

Generate the prompt argument to pass to the client.

Argument

model_input The input passed by the user.

Returns:

Type Description
str

The formatted input to be passed to the model.

Source code in outlines/models/tgi.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the client.

    Argument
    --------
    model_input
        The input passed by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {input} is not available with TGI. "
        + "Please provide a string."
    )

format_output_type(output_type=None)

Generate the structured output argument to pass to the client.

Argument

output_type The structured output type provided.

Returns:

Type Description
dict

The structured output argument to pass to the client.

Source code in outlines/models/tgi.py
def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the client.

    Argument
    --------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The structured output argument to pass to the client.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        raise NotImplementedError(
            "TGI does not support CFG-based structured outputs."
        )
    elif isinstance(term, JsonSchema):
        return {
            "grammar": {
                "type": "json",
                "value": json.loads(term.schema),
            }
        }
    else:
        return {
            "grammar": {
                "type": "regex",
                "value": to_regex(term),
            }
        }

from_tgi(client)

Create an Outlines TGI or AsyncTGI model instance from an huggingface_hub.InferenceClient or huggingface_hub.AsyncInferenceClient instance.

Parameters:

Name Type Description Default
client Union[InferenceClient, AsyncInferenceClient]

An huggingface_hub.InferenceClient or huggingface_hub.AsyncInferenceClient instance.

required

Returns:

Type Description
Union[TGI, AsyncTGI]

An Outlines TGI or AsyncTGI model instance.

Source code in outlines/models/tgi.py
def from_tgi(
    client: Union["InferenceClient", "AsyncInferenceClient"],
) -> Union[TGI, AsyncTGI]:
    """Create an Outlines `TGI` or `AsyncTGI` model instance from an
    `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient`
    instance.

    Parameters
    ----------
    client
        An `huggingface_hub.InferenceClient` or
        `huggingface_hub.AsyncInferenceClient` instance.

    Returns
    -------
    Union[TGI, AsyncTGI]
        An Outlines `TGI` or `AsyncTGI` model instance.

    """
    from huggingface_hub import AsyncInferenceClient, InferenceClient

    if isinstance(client, InferenceClient):
        return TGI(client)
    elif isinstance(client, AsyncInferenceClient):
        return AsyncTGI(client)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            + "Please provide an HuggingFace InferenceClient "
            + "or AsyncInferenceClient instance."
        )

tokenizer

Tokenizer

Bases: Hashable, Protocol

Source code in outlines/models/tokenizer.py
class Tokenizer(Hashable, Protocol):
    eos_token: str
    eos_token_id: int
    pad_token_id: int
    vocabulary: Dict[str, int]
    special_tokens: Set[str]

    def encode(
        self, prompt: Union[str, List[str]]
    ) -> "Tuple['NDArray[np.int64]', 'NDArray[np.int64]']":
        """Translate the input prompts into arrays of token ids and attention mask."""
        ...

    def decode(self, token_ids: "NDArray[np.int64]") -> List[str]:
        """Translate an array of token ids to a string or list of strings."""
        ...

    def convert_token_to_string(self, token: str) -> str:
        """Convert a token to its equivalent string.

        This is for instance useful for BPE tokenizers where whitespaces are
        represented by the special characted `Ġ`. This prevents matching a raw
        token that includes `Ġ` with a string.
        """
        ...

convert_token_to_string(token)

Convert a token to its equivalent string.

This is for instance useful for BPE tokenizers where whitespaces are represented by the special characted Ġ. This prevents matching a raw token that includes Ġ with a string.

Source code in outlines/models/tokenizer.py
def convert_token_to_string(self, token: str) -> str:
    """Convert a token to its equivalent string.

    This is for instance useful for BPE tokenizers where whitespaces are
    represented by the special characted `Ġ`. This prevents matching a raw
    token that includes `Ġ` with a string.
    """
    ...

decode(token_ids)

Translate an array of token ids to a string or list of strings.

Source code in outlines/models/tokenizer.py
def decode(self, token_ids: "NDArray[np.int64]") -> List[str]:
    """Translate an array of token ids to a string or list of strings."""
    ...

encode(prompt)

Translate the input prompts into arrays of token ids and attention mask.

Source code in outlines/models/tokenizer.py
def encode(
    self, prompt: Union[str, List[str]]
) -> "Tuple['NDArray[np.int64]', 'NDArray[np.int64]']":
    """Translate the input prompts into arrays of token ids and attention mask."""
    ...

transformers

Integration with the transformers library.

TransformerTokenizer

Bases: Tokenizer

Represents a tokenizer for models in the transformers library.

Source code in outlines/models/transformers.py
class TransformerTokenizer(Tokenizer):
    """Represents a tokenizer for models in the `transformers` library."""

    def __init__(self, tokenizer: "PreTrainedTokenizer", **kwargs):
        self.tokenizer = tokenizer
        self.eos_token_id = self.tokenizer.eos_token_id
        self.eos_token = self.tokenizer.eos_token

        if self.tokenizer.pad_token_id is None:
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
            self.pad_token_id = self.eos_token_id
        else:
            self.pad_token_id = self.tokenizer.pad_token_id
            self.pad_token = self.tokenizer.pad_token

        self.special_tokens = set(self.tokenizer.all_special_tokens)

        self.vocabulary = self.tokenizer.get_vocab()
        self.is_llama = isinstance(self.tokenizer, get_llama_tokenizer_types())

    def encode(
        self, prompt: Union[str, List[str]], **kwargs
    ) -> Tuple["torch.LongTensor", "torch.LongTensor"]:
        kwargs["padding"] = True
        kwargs["return_tensors"] = "pt"
        output = self.tokenizer(prompt, **kwargs)
        return output["input_ids"], output["attention_mask"]

    def decode(self, token_ids: "torch.LongTensor") -> List[str]:
        text = self.tokenizer.batch_decode(token_ids, skip_special_tokens=True)
        return text

    def convert_token_to_string(self, token: str) -> str:
        from transformers.file_utils import SPIECE_UNDERLINE

        string = self.tokenizer.convert_tokens_to_string([token])

        if self.is_llama:
            # A hack to handle missing spaces to HF's Llama tokenizers
            if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
                return " " + string

        return string

    def __eq__(self, other):
        if isinstance(other, type(self)):
            if hasattr(self, "model_name") and hasattr(self, "kwargs"):
                return (
                    other.model_name == self.model_name and other.kwargs == self.kwargs
                )
            else:
                return other.tokenizer == self.tokenizer
        return NotImplemented

    def __hash__(self):
        from datasets.fingerprint import Hasher

        return hash(Hasher.hash(self.tokenizer))

    def __getstate__(self):
        state = {"tokenizer": self.tokenizer}
        return state

    def __setstate__(self, state):
        self.__init__(state["tokenizer"])

Transformers

Bases: Model

Thin wrapper around a transformers model and a transformers tokenizer.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and tokenizer.

Source code in outlines/models/transformers.py
class Transformers(Model):
    """Thin wrapper around a `transformers` model and a `transformers`
    tokenizer.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    tokenizer.

    """

    def __init__(
        self,
        model: "PreTrainedModel",
        tokenizer: "PreTrainedTokenizer",
    ):
        """
        Parameters:
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        tokenizer
            A `PreTrainedTokenizer`, or any tokenizer that is compatible with
            the `transformers` API for tokenizers.

        """
        # We need to handle the cases in which jax/flax or tensorflow
        # is not available in the environment.
        try:
            from transformers import FlaxPreTrainedModel
        except ImportError:  # pragma: no cover
            FlaxPreTrainedModel = None

        try:
            from transformers import TFPreTrainedModel
        except ImportError:  # pragma: no cover
            TFPreTrainedModel = None

        tokenizer.padding_side = "left"
        self.model = model
        self.transformer_tokenizer = tokenizer
        self.tokenizer = TransformerTokenizer(tokenizer)
        self.type_adapter = TransformersTypeAdapter()

        if (
            FlaxPreTrainedModel is not None
            and isinstance(model, FlaxPreTrainedModel)
        ):
            self.tensor_library_name = "jax"
        elif (
            TFPreTrainedModel is not None
            and isinstance(model, TFPreTrainedModel)
        ):
            self.tensor_library_name = "tensorflow"
        else:
            self.tensor_library_name = "torch"

    def _prepare_model_inputs(
        self,
        model_input: Union[str, List[str], dict],
        output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        prompts = self.type_adapter.format_input(model_input)
        input_ids, attention_mask = self.tokenizer.encode(prompts)
        inputs = {
            "input_ids": input_ids.to(self.model.device),
            "attention_mask": attention_mask.to(self.model.device),
        }

        return prompts, inputs

    def generate(
        self,
        model_input: Union[str, List[str], dict],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str]]:
        """Generate text using `transformers`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response. For
            multi-modal models, the input should be a dictionary containing the
            `text` key with a value of type `Union[str, List[str]]` and the
            other keys required by the model.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            of the `transformers` model.

        Returns
        -------
        Union[str, List[str]]
            The text generated by the model.

        """
        prompts, inputs = self._prepare_model_inputs(model_input, output_type)
        logits_processor = self.type_adapter.format_output_type(output_type)

        generated_ids = self._generate_output_seq(
            prompts, inputs, logits_processor=logits_processor, **inference_kwargs
        )

        # if single str input, convert to a 1D outputt
        if isinstance(prompts, str):
            generated_ids = generated_ids.squeeze(0)

        return self._decode_generation(generated_ids)

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `transformers` models.

        TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

        """
        raise NotImplementedError(
            "Streaming is not implemented for Transformers models."
        )

    def _generate_output_seq(self, prompts, inputs, **inference_kwargs):
        input_ids = inputs["input_ids"]
        output_ids = self.model.generate(
            **inputs,
            tokenizer=self.transformer_tokenizer,
            **inference_kwargs,
        )

        # encoder-decoder returns output_ids only, decoder-only returns full seq ids
        if self.model.config.is_encoder_decoder:
            generated_ids = output_ids
        else:
            generated_ids = output_ids[:, input_ids.shape[1] :]

        # if batch list inputs AND multiple samples per input, convert generated_id to 3D view
        num_samples = inference_kwargs.get("num_return_sequences", 1)
        if num_samples > 1 and isinstance(prompts, list):
            batch_size = input_ids.size(0)
            generated_ids = generated_ids.view(batch_size, num_samples, -1)

        return generated_ids

    def _decode_generation(self, generated_ids: "torch.Tensor"):
        if len(generated_ids.shape) == 1:
            return self.tokenizer.decode([generated_ids])[0]
        elif len(generated_ids.shape) == 2:
            return self.tokenizer.decode(generated_ids)
        elif len(generated_ids.shape) == 3:
            return [
                self.tokenizer.decode(generated_ids[i])
                for i in range(len(generated_ids))
            ]
        else:  # pragma: no cover
            raise TypeError(
                f"Generated outputs aren't 1D, 2D or 3D, but instead are {generated_ids.shape}"
            )

__init__(model, tokenizer)

Parameters:

model A PreTrainedModel, or any model that is compatible with the transformers API for models. tokenizer A PreTrainedTokenizer, or any tokenizer that is compatible with the transformers API for tokenizers.

Source code in outlines/models/transformers.py
def __init__(
    self,
    model: "PreTrainedModel",
    tokenizer: "PreTrainedTokenizer",
):
    """
    Parameters:
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    tokenizer
        A `PreTrainedTokenizer`, or any tokenizer that is compatible with
        the `transformers` API for tokenizers.

    """
    # We need to handle the cases in which jax/flax or tensorflow
    # is not available in the environment.
    try:
        from transformers import FlaxPreTrainedModel
    except ImportError:  # pragma: no cover
        FlaxPreTrainedModel = None

    try:
        from transformers import TFPreTrainedModel
    except ImportError:  # pragma: no cover
        TFPreTrainedModel = None

    tokenizer.padding_side = "left"
    self.model = model
    self.transformer_tokenizer = tokenizer
    self.tokenizer = TransformerTokenizer(tokenizer)
    self.type_adapter = TransformersTypeAdapter()

    if (
        FlaxPreTrainedModel is not None
        and isinstance(model, FlaxPreTrainedModel)
    ):
        self.tensor_library_name = "jax"
    elif (
        TFPreTrainedModel is not None
        and isinstance(model, TFPreTrainedModel)
    ):
        self.tensor_library_name = "tensorflow"
    else:
        self.tensor_library_name = "torch"

generate(model_input, output_type=None, **inference_kwargs)

Generate text using transformers.

Parameters:

Name Type Description Default
model_input Union[str, List[str], dict]

The prompt based on which the model will generate a response. For multi-modal models, the input should be a dictionary containing the text key with a value of type Union[str, List[str]] and the other keys required by the model.

required
output_type Optional[OutlinesLogitsProcessor]

The logits processor the model will use to constrain the format of the generated text.

None
inference_kwargs Any

Additional keyword arguments to pass to the generate method of the transformers model.

{}

Returns:

Type Description
Union[str, List[str]]

The text generated by the model.

Source code in outlines/models/transformers.py
def generate(
    self,
    model_input: Union[str, List[str], dict],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str]]:
    """Generate text using `transformers`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response. For
        multi-modal models, the input should be a dictionary containing the
        `text` key with a value of type `Union[str, List[str]]` and the
        other keys required by the model.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        of the `transformers` model.

    Returns
    -------
    Union[str, List[str]]
        The text generated by the model.

    """
    prompts, inputs = self._prepare_model_inputs(model_input, output_type)
    logits_processor = self.type_adapter.format_output_type(output_type)

    generated_ids = self._generate_output_seq(
        prompts, inputs, logits_processor=logits_processor, **inference_kwargs
    )

    # if single str input, convert to a 1D outputt
    if isinstance(prompts, str):
        generated_ids = generated_ids.squeeze(0)

    return self._decode_generation(generated_ids)

generate_stream(model_input, output_type, **inference_kwargs)

Not available for transformers models.

TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

Source code in outlines/models/transformers.py
def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `transformers` models.

    TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

    """
    raise NotImplementedError(
        "Streaming is not implemented for Transformers models."
    )

TransformersMultiModal

Bases: Transformers

Thin wrapper around a transformers model and a transformers processor.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and processor.

Source code in outlines/models/transformers.py
class TransformersMultiModal(Transformers):
    """Thin wrapper around a `transformers` model and a `transformers`
    processor.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    processor.

    """

    def __init__(self, model: "PreTrainedModel", processor):
        """Create a TransformersMultiModal model instance

        We rely on the `__init__` method of the `Transformers` class to handle
        most of the initialization and then add elements specific to vision
        models.

        Parameters
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        processor
            A `ProcessorMixin` instance.

        """
        self.processor = processor
        self.processor.padding_side = "left"
        self.processor.pad_token = "[PAD]"

        tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

        super().__init__(model, tokenizer)

        self.type_adapter = TransformersMultiModalTypeAdapter()

    def _prepare_model_inputs(
        self,
        model_input: Union[str, List[str], dict],
        output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        model_input = self.type_adapter.format_input(model_input)
        inputs = self.processor(
            **model_input, padding=True, return_tensors="pt"
        ).to(self.model.device)

        return model_input["text"], inputs

__init__(model, processor)

Create a TransformersMultiModal model instance

We rely on the __init__ method of the Transformers class to handle most of the initialization and then add elements specific to vision models.

Parameters:

Name Type Description Default
model PreTrainedModel

A PreTrainedModel, or any model that is compatible with the transformers API for models.

required
processor

A ProcessorMixin instance.

required
Source code in outlines/models/transformers.py
def __init__(self, model: "PreTrainedModel", processor):
    """Create a TransformersMultiModal model instance

    We rely on the `__init__` method of the `Transformers` class to handle
    most of the initialization and then add elements specific to vision
    models.

    Parameters
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    processor
        A `ProcessorMixin` instance.

    """
    self.processor = processor
    self.processor.padding_side = "left"
    self.processor.pad_token = "[PAD]"

    tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

    super().__init__(model, tokenizer)

    self.type_adapter = TransformersMultiModalTypeAdapter()

TransformersMultiModalTypeAdapter

Bases: ModelTypeAdapter

Type adapter for TransformersMultiModal model.

Source code in outlines/models/transformers.py
class TransformersMultiModalTypeAdapter(ModelTypeAdapter):
    """Type adapter for `TransformersMultiModal` model."""

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt arguments to pass to the model.

        Argument
        --------
        model_input
            The input passed by the user.

        Returns
        -------
        dict
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {input} is not available. Please provide a "
            + "dictionary containing at least the 'text' key with a value "
            + "of type Union[str, List[str]]. You should also include the "
            + "other keys required by your processor (for instance, 'images' "
            + "or 'audios')."
            + "Make sure that the text is correctly formatted for the model "
            + "(e.g. include <image> or <|AUDIO|> tags) and that the number "
            + "of text tags match the number of additional assets provided."
        )

    @format_input.register(dict)
    def format_list_input(self, model_input: dict) -> dict:
        if "text" not in model_input:
            raise ValueError(
                "The input must contain the 'text' key along with the other "
                + "keys required by your processor."
            )
        return model_input

    def format_output_type(
        self,
        output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional["LogitsProcessorList"]:
        """Generate the logits processor argument to pass to the model.

        Argument
        --------
        output_type
            The logits processor provided.

        Returns
        -------
        Optional[LogitsProcessorList]
            The logits processor to pass to the model.

        """
        from transformers import LogitsProcessorList

        if output_type is not None:
            return LogitsProcessorList([output_type])
        return None

format_input(model_input)

Generate the prompt arguments to pass to the model.

Argument

model_input The input passed by the user.

Returns:

Type Description
dict

The formatted input to be passed to the model.

Source code in outlines/models/transformers.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt arguments to pass to the model.

    Argument
    --------
    model_input
        The input passed by the user.

    Returns
    -------
    dict
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {input} is not available. Please provide a "
        + "dictionary containing at least the 'text' key with a value "
        + "of type Union[str, List[str]]. You should also include the "
        + "other keys required by your processor (for instance, 'images' "
        + "or 'audios')."
        + "Make sure that the text is correctly formatted for the model "
        + "(e.g. include <image> or <|AUDIO|> tags) and that the number "
        + "of text tags match the number of additional assets provided."
    )

format_output_type(output_type=None)

Generate the logits processor argument to pass to the model.

Argument

output_type The logits processor provided.

Returns:

Type Description
Optional[LogitsProcessorList]

The logits processor to pass to the model.

Source code in outlines/models/transformers.py
def format_output_type(
    self,
    output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional["LogitsProcessorList"]:
    """Generate the logits processor argument to pass to the model.

    Argument
    --------
    output_type
        The logits processor provided.

    Returns
    -------
    Optional[LogitsProcessorList]
        The logits processor to pass to the model.

    """
    from transformers import LogitsProcessorList

    if output_type is not None:
        return LogitsProcessorList([output_type])
    return None

TransformersTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the Transformers model.

Source code in outlines/models/transformers.py
class TransformersTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Transformers` model."""

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Parameters
        ----------
        model_input
            The input passed by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {input} is not available."
            "Please use a string or a list of strings."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str:
        return model_input

    @format_input.register(list)
    def format_list_input(self, model_input: List[str]) -> List[str]:
        return model_input

    def format_output_type(
        self,
        output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional["LogitsProcessorList"]:
        """Generate the logits processor argument to pass to the model.

        Parameters
        ----------
        output_type
            The logits processor provided.

        Returns
        -------
        Optional[LogitsProcessorList]
            The logits processor to pass to the model.

        """
        from transformers import LogitsProcessorList

        if output_type is not None:
            return LogitsProcessorList([output_type])
        return None

format_input(model_input)

Generate the prompt argument to pass to the model.

Parameters:

Name Type Description Default
model_input

The input passed by the user.

required

Returns:

Type Description
str

The formatted input to be passed to the model.

Source code in outlines/models/transformers.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Parameters
    ----------
    model_input
        The input passed by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {input} is not available."
        "Please use a string or a list of strings."
    )

format_output_type(output_type=None)

Generate the logits processor argument to pass to the model.

Parameters:

Name Type Description Default
output_type Optional[OutlinesLogitsProcessor]

The logits processor provided.

None

Returns:

Type Description
Optional[LogitsProcessorList]

The logits processor to pass to the model.

Source code in outlines/models/transformers.py
def format_output_type(
    self,
    output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional["LogitsProcessorList"]:
    """Generate the logits processor argument to pass to the model.

    Parameters
    ----------
    output_type
        The logits processor provided.

    Returns
    -------
    Optional[LogitsProcessorList]
        The logits processor to pass to the model.

    """
    from transformers import LogitsProcessorList

    if output_type is not None:
        return LogitsProcessorList([output_type])
    return None

from_transformers(model, tokenizer_or_processor)

Create an Outlines Transformers or TransformersMultiModal model instance from a PreTrainedModel instance and a PreTrainedTokenizer or ProcessorMixin instance.

outlines supports PreTrainedModelForCausalLM, PreTrainedMambaForCausalLM, PreTrainedModelForSeq2Seq and any model that implements the transformers model API.

Parameters:

Name Type Description Default
model PreTrainedModel

A transformers.PreTrainedModel instance.

required
tokenizer_or_processor Union[PreTrainedTokenizer, ProcessorMixin]

A transformers.PreTrainedTokenizer or transformers.ProcessorMixin instance.

required

Returns:

Type Description
Union[Transformers, TransformersMultiModal]

An Outlines Transformers or TransformersMultiModal model instance.

Source code in outlines/models/transformers.py
def from_transformers(
    model: "PreTrainedModel",
    tokenizer_or_processor: Union["PreTrainedTokenizer", "ProcessorMixin"],
) -> Union[Transformers, TransformersMultiModal]:
    """Create an Outlines `Transformers` or `TransformersMultiModal` model
    instance from a `PreTrainedModel` instance and a `PreTrainedTokenizer` or
    `ProcessorMixin` instance.

    `outlines` supports `PreTrainedModelForCausalLM`,
    `PreTrainedMambaForCausalLM`, `PreTrainedModelForSeq2Seq` and any model
    that implements the `transformers` model API.

    Parameters
    ----------
    model
        A `transformers.PreTrainedModel` instance.
    tokenizer_or_processor
        A `transformers.PreTrainedTokenizer` or
        `transformers.ProcessorMixin` instance.

    Returns
    -------
    Union[Transformers, TransformersMultiModal]
        An Outlines `Transformers` or `TransformersMultiModal` model instance.

    """
    from transformers import (
        PreTrainedTokenizer, PreTrainedTokenizerFast, ProcessorMixin)

    if isinstance(
        tokenizer_or_processor, (PreTrainedTokenizer, PreTrainedTokenizerFast)
    ):
        tokenizer = tokenizer_or_processor
        return Transformers(model, tokenizer)
    elif isinstance(tokenizer_or_processor, ProcessorMixin):
        processor = tokenizer_or_processor
        return TransformersMultiModal(model, processor)
    else:
        raise ValueError(
            "We could determine whether the model passed to `from_transformers`"
            + " is a text-2-text or a multi-modal model. Please provide a "
            + "a transformers tokenizer or processor."
        )

get_llama_tokenizer_types()

Get all the Llama tokenizer types/classes that need work-arounds.

When they can't be imported, a dummy class is created.

Source code in outlines/models/transformers.py
def get_llama_tokenizer_types():
    """Get all the Llama tokenizer types/classes that need work-arounds.

    When they can't be imported, a dummy class is created.

    """
    try:
        from transformers.models.llama import LlamaTokenizer
    except ImportError:  # pragma: no cover

        class LlamaTokenizer:  # type: ignore
            pass

    try:
        from transformers.models.llama import LlamaTokenizerFast
    except ImportError:  # pragma: no cover

        class LlamaTokenizerFast:  # type: ignore
            pass

    try:
        from transformers.models.code_llama import CodeLlamaTokenizer
    except ImportError:  # pragma: no cover

        class CodeLlamaTokenizer:  # type: ignore
            pass

    try:
        from transformers.models.code_llama import CodeLlamaTokenizerFast
    except ImportError:  # pragma: no cover

        class CodeLlamaTokenizerFast:  # type: ignore
            pass

    return (
        LlamaTokenizer,
        LlamaTokenizerFast,
        CodeLlamaTokenizer,
        CodeLlamaTokenizerFast,
    )

vllm

Integration with a vLLM server.

AsyncVLLM

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py
class AsyncVLLM(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "AsyncOpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    async def generate(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise ValueError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}"
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_stream( # type: ignore
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.
        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )

        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            **messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client AsyncOpenAI

An openai.AsyncOpenAI client instance.

required
Source code in outlines/models/vllm.py
def __init__(
    self,
    client: "AsyncOpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs) async

Generate text using vLLM.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Union[str, list[str]]

The text generated by the model.

Source code in outlines/models/vllm.py
async def generate(
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise ValueError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}"
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

generate_stream(model_input, output_type=None, **inference_kwargs) async

Stream text using vLLM.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
AsyncIterator[str]

An async iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py
async def generate_stream( # type: ignore
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.
    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    stream = await self.client.chat.completions.create(
        **client_args,
        stream=True,
    )

    async for chunk in stream:  # pragma: no cover
        if chunk.choices and chunk.choices[0].delta.content is not None:
            yield chunk.choices[0].delta.content

VLLM

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py
class VLLM(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "OpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    def generate(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise ValueError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}"
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_stream(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )

        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[str, Vision],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            **messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client OpenAI

An openai.OpenAI client instance.

required
Source code in outlines/models/vllm.py
def __init__(
    self,
    client: "OpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using vLLM.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Union[str, list[str]]

The text generated by the model.

Source code in outlines/models/vllm.py
def generate(
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise ValueError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}"
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

generate_stream(model_input, output_type=None, **inference_kwargs)

Stream text using vLLM.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py
def generate_stream(
    self,
    model_input: Union[str, Vision],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    stream = self.client.chat.completions.create(
        **client_args, stream=True,
    )

    for chunk in stream:  # pragma: no cover
        if chunk.choices and chunk.choices[0].delta.content is not None:
            yield chunk.choices[0].delta.content

VLLMTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the VLLM and AsyncVLLM models.

Source code in outlines/models/vllm.py
class VLLMTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `VLLM` and `AsyncVLLM` models."""

    def format_input(self, model_input: Union[str, Vision]) -> dict:
        """Generate the prompt argument to pass to the client.

        We rely on the OpenAITypeAdapter to format the input as the vLLM server
        expects input in the same format as OpenAI.

        Parameters
        ----------
        model_input
            The input passed by the user.

        Returns
        -------
        dict
            The formatted input to be passed to the model.

        """
        return OpenAITypeAdapter().format_input(model_input)

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the client.

        Parameters
        ----------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The structured output argument to pass to the model.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            return {"guided_grammar": term.definition}
        elif isinstance(term, JsonSchema):
            extra_body = {"guided_json": json.loads(term.schema)}
            if term.whitespace_pattern:
                extra_body["whitespace_pattern"] = term.whitespace_pattern
            return extra_body
        else:
            return {"guided_regex": to_regex(term)}

format_input(model_input)

Generate the prompt argument to pass to the client.

We rely on the OpenAITypeAdapter to format the input as the vLLM server expects input in the same format as OpenAI.

Parameters:

Name Type Description Default
model_input Union[str, Vision]

The input passed by the user.

required

Returns:

Type Description
dict

The formatted input to be passed to the model.

Source code in outlines/models/vllm.py
def format_input(self, model_input: Union[str, Vision]) -> dict:
    """Generate the prompt argument to pass to the client.

    We rely on the OpenAITypeAdapter to format the input as the vLLM server
    expects input in the same format as OpenAI.

    Parameters
    ----------
    model_input
        The input passed by the user.

    Returns
    -------
    dict
        The formatted input to be passed to the model.

    """
    return OpenAITypeAdapter().format_input(model_input)

format_output_type(output_type=None)

Generate the structured output argument to pass to the client.

Parameters:

Name Type Description Default
output_type Optional[Any]

The structured output type provided.

None

Returns:

Type Description
dict

The structured output argument to pass to the model.

Source code in outlines/models/vllm.py
def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the client.

    Parameters
    ----------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The structured output argument to pass to the model.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        return {"guided_grammar": term.definition}
    elif isinstance(term, JsonSchema):
        extra_body = {"guided_json": json.loads(term.schema)}
        if term.whitespace_pattern:
            extra_body["whitespace_pattern"] = term.whitespace_pattern
        return extra_body
    else:
        return {"guided_regex": to_regex(term)}

from_vllm(client, model_name=None)

Create an Outlines VLLM or AsyncVLLM model instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name Type Description Default
client Union[OpenAI, AsyncOpenAI]

An openai.OpenAI or openai.AsyncOpenAI instance.

required
model_name Optional[str]

The name of the model to use.

None

Returns:

Type Description
Union[VLLM, AsyncVLLM]

An Outlines VLLM or AsyncVLLM model instance.

Source code in outlines/models/vllm.py
def from_vllm(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[VLLM, AsyncVLLM]:
    """Create an Outlines `VLLM` or `AsyncVLLM` model instance from an
    `openai.OpenAI` or `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[VLLM, AsyncVLLM]
        An Outlines `VLLM` or `AsyncVLLM` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return VLLM(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncVLLM(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

vllm_offline

Integration with the vllm library (offline mode).

VLLMOffline

Bases: Model

Thin wrapper around a vllm.LLM model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the vllm.LLM model.

Source code in outlines/models/vllm_offline.py
class VLLMOffline(Model):
    """Thin wrapper around a `vllm.LLM` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `vllm.LLM` model.

    """

    def __init__(self, model: "LLM"):
        """Create a VLLM model instance.

        Parameters
        ----------
        model
            A `vllm.LLM` model instance.

        """
        self.model = model
        self.type_adapter = VLLMOfflineTypeAdapter()
        self.lora_request = None # v0 legacy, to be removed

    def generate(
        self,
        model_input: Union[str, List[str]],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str], List[List[str]]]:
        """Generate text using vLLM.

        Parameters
        ----------
        prompt
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            in the `vllm.LLM` model.

        Returns
        -------
        Union[str, List[str], List[List[str]]]
            The text generated by the model.

        """
        from vllm.sampling_params import GuidedDecodingParams, SamplingParams

        sampling_params = inference_kwargs.pop("sampling_params", None)

        if sampling_params is None:
            sampling_params = SamplingParams()

        output_type_args = self.type_adapter.format_output_type(output_type)
        if output_type_args:
            sampling_params.guided_decoding = GuidedDecodingParams(**output_type_args)

        results = self.model.generate(
            self.type_adapter.format_input(model_input),
            sampling_params=sampling_params,
            lora_request=self.lora_request, # v0 legacy, to be removed
            **inference_kwargs,
        )
        results = [[sample.text for sample in batch.outputs] for batch in results]

        batch_size = len(results)
        sample_size = len(results[0])

        if batch_size == 1 and sample_size == 1:
            return results[0][0]
        elif batch_size == 1:
            return results[0]
        elif sample_size == 1:
            return [batch[0] for batch in results]

        return results

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `vllm.LLM`.

        TODO: Implement the streaming functionality ourselves.

        """
        raise NotImplementedError(
            "Streaming is not available for the vLLM integration."
        )

    def load_lora(self, adapter_path: Optional[str]) -> None:
        """Load a LoRA adapter. Deprecated since v1.0.0.

        Use the `lora_request` argument when calling the model or generator
        instead.

        """
        warnings.warn("""
            The `load_lora` method is deprecated starting from v1.0.0.
            Support for it will be removed in v1.1.0.
            Please use the v1 of the `outlines` library by using the
            `outlines.from_vllm` function to create a `VLLM` model
            instance.
            In the v1, you must pass the `lora_request` argument as
            a keyword argument when calling the model or generator.
            """)

        from vllm.lora.request import LoRARequest

        if adapter_path is None:
            self.lora_request = None
        else:
            self.lora_request = LoRARequest(adapter_path, 1, adapter_path)

__init__(model)

Create a VLLM model instance.

Parameters:

Name Type Description Default
model LLM

A vllm.LLM model instance.

required
Source code in outlines/models/vllm_offline.py
def __init__(self, model: "LLM"):
    """Create a VLLM model instance.

    Parameters
    ----------
    model
        A `vllm.LLM` model instance.

    """
    self.model = model
    self.type_adapter = VLLMOfflineTypeAdapter()
    self.lora_request = None # v0 legacy, to be removed

generate(model_input, output_type=None, **inference_kwargs)

Generate text using vLLM.

Parameters:

Name Type Description Default
prompt

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The logits processor the model will use to constrain the format of the generated text.

None
inference_kwargs Any

Additional keyword arguments to pass to the generate method in the vllm.LLM model.

{}

Returns:

Type Description
Union[str, List[str], List[List[str]]]

The text generated by the model.

Source code in outlines/models/vllm_offline.py
def generate(
    self,
    model_input: Union[str, List[str]],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str], List[List[str]]]:
    """Generate text using vLLM.

    Parameters
    ----------
    prompt
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        in the `vllm.LLM` model.

    Returns
    -------
    Union[str, List[str], List[List[str]]]
        The text generated by the model.

    """
    from vllm.sampling_params import GuidedDecodingParams, SamplingParams

    sampling_params = inference_kwargs.pop("sampling_params", None)

    if sampling_params is None:
        sampling_params = SamplingParams()

    output_type_args = self.type_adapter.format_output_type(output_type)
    if output_type_args:
        sampling_params.guided_decoding = GuidedDecodingParams(**output_type_args)

    results = self.model.generate(
        self.type_adapter.format_input(model_input),
        sampling_params=sampling_params,
        lora_request=self.lora_request, # v0 legacy, to be removed
        **inference_kwargs,
    )
    results = [[sample.text for sample in batch.outputs] for batch in results]

    batch_size = len(results)
    sample_size = len(results[0])

    if batch_size == 1 and sample_size == 1:
        return results[0][0]
    elif batch_size == 1:
        return results[0]
    elif sample_size == 1:
        return [batch[0] for batch in results]

    return results

generate_stream(model_input, output_type, **inference_kwargs)

Not available for vllm.LLM.

TODO: Implement the streaming functionality ourselves.

Source code in outlines/models/vllm_offline.py
def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `vllm.LLM`.

    TODO: Implement the streaming functionality ourselves.

    """
    raise NotImplementedError(
        "Streaming is not available for the vLLM integration."
    )

load_lora(adapter_path)

Load a LoRA adapter. Deprecated since v1.0.0.

Use the lora_request argument when calling the model or generator instead.

Source code in outlines/models/vllm_offline.py
def load_lora(self, adapter_path: Optional[str]) -> None:
    """Load a LoRA adapter. Deprecated since v1.0.0.

    Use the `lora_request` argument when calling the model or generator
    instead.

    """
    warnings.warn("""
        The `load_lora` method is deprecated starting from v1.0.0.
        Support for it will be removed in v1.1.0.
        Please use the v1 of the `outlines` library by using the
        `outlines.from_vllm` function to create a `VLLM` model
        instance.
        In the v1, you must pass the `lora_request` argument as
        a keyword argument when calling the model or generator.
        """)

    from vllm.lora.request import LoRARequest

    if adapter_path is None:
        self.lora_request = None
    else:
        self.lora_request = LoRARequest(adapter_path, 1, adapter_path)

VLLMOfflineTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the VLLMOffline model.

Source code in outlines/models/vllm_offline.py
class VLLMOfflineTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `VLLMOffline` model."""

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Argument
        --------
        model_input
            The input passed by the user.

        """
        raise NotImplementedError(
            f"The input type {input} is not available. "
            "Please use a string or a list of strings."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str:
        return model_input

    @format_input.register(list)
    def format_list_input(self, model_input: List[str]) -> List[str]:
        return model_input

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the model.

        For vLLM, the structured output definition is set in the
        `GuidedDecodingParams` constructor that is provided as a value to the
        `guided_decoding` parameter of the `SamplingParams` constructor, itself
        provided as a value to the `sampling_params` parameter of the `generate`
        method.

        Parameters
        ----------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The arguments to provide to the `GuidedDecodingParams` constructor.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            return {"grammar": term.definition}
        elif isinstance(term, JsonSchema):
            guided_decoding_params = {"json": json.loads(term.schema)}
            if term.whitespace_pattern:
                guided_decoding_params["whitespace_pattern"] = term.whitespace_pattern
            return guided_decoding_params
        else:
            return {"regex": to_regex(term)}

format_input(model_input)

Generate the prompt argument to pass to the model.

Argument

model_input The input passed by the user.

Source code in outlines/models/vllm_offline.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Argument
    --------
    model_input
        The input passed by the user.

    """
    raise NotImplementedError(
        f"The input type {input} is not available. "
        "Please use a string or a list of strings."
    )

format_output_type(output_type=None)

Generate the structured output argument to pass to the model.

For vLLM, the structured output definition is set in the GuidedDecodingParams constructor that is provided as a value to the guided_decoding parameter of the SamplingParams constructor, itself provided as a value to the sampling_params parameter of the generate method.

Parameters:

Name Type Description Default
output_type Optional[Any]

The structured output type provided.

None

Returns:

Type Description
dict

The arguments to provide to the GuidedDecodingParams constructor.

Source code in outlines/models/vllm_offline.py
def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the model.

    For vLLM, the structured output definition is set in the
    `GuidedDecodingParams` constructor that is provided as a value to the
    `guided_decoding` parameter of the `SamplingParams` constructor, itself
    provided as a value to the `sampling_params` parameter of the `generate`
    method.

    Parameters
    ----------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The arguments to provide to the `GuidedDecodingParams` constructor.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        return {"grammar": term.definition}
    elif isinstance(term, JsonSchema):
        guided_decoding_params = {"json": json.loads(term.schema)}
        if term.whitespace_pattern:
            guided_decoding_params["whitespace_pattern"] = term.whitespace_pattern
        return guided_decoding_params
    else:
        return {"regex": to_regex(term)}

from_vllm_offline(model)

Create an Outlines VLLMOffline model instance from a vllm.LLM instance.

Parameters:

Name Type Description Default
model LLM

A vllm.LLM instance.

required

Returns:

Type Description
VLLMOffline

An Outlines VLLMOffline model instance.

Source code in outlines/models/vllm_offline.py
def from_vllm_offline(model: "LLM") -> VLLMOffline:
    """Create an Outlines `VLLMOffline` model instance from a `vllm.LLM`
    instance.

    Parameters
    ----------
    model
        A `vllm.LLM` instance.

    Returns
    -------
    VLLMOffline
        An Outlines `VLLMOffline` model instance.

    """
    return VLLMOffline(model)