Skip to content

models

Module that contains all the models integrated in outlines.

We group the models in submodules by provider instead of theme (completion, chat completion, diffusers, etc.) and use routing functions everywhere else in the codebase.

anthropic

Integration with Anthropic's API.

Anthropic

Bases: Model

Thin wrapper around the anthropic.Anthropic client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the anthropic.Anthropic client.

Source code in outlines/models/anthropic.py
class Anthropic(Model):
    """Thin wrapper around the `anthropic.Anthropic` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `anthropic.Anthropic` client.

    """
    def __init__(
        self, client: "AnthropicClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client
            An `anthropic.Anthropic` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = AnthropicTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        completion = self.client.messages.create(
            **messages,
            **inference_kwargs,
        )
        return completion.content[0].text

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Anthropic does not support batch generation."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        stream = self.client.messages.create(
            **messages,
            stream=True,
            **inference_kwargs,
        )

        for chunk in stream:
            if (
                chunk.type == "content_block_delta"
                and chunk.delta.type == "text_delta"
            ):
                yield chunk.delta.text

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client Anthropic

An anthropic.Anthropic client.

required
model_name Optional[str]

The name of the model to use.

None
Source code in outlines/models/anthropic.py
def __init__(
    self, client: "AnthropicClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client
        An `anthropic.Anthropic` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = AnthropicTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using Anthropic.

Parameters:

Name Type Description Default
model_input Union[Chat, list, str]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

As structured generation is not supported by Anthropic, the value of this argument must be None. Otherwise, an error will be raised at runtime.

None
**inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
str

The response generated by the model.

Source code in outlines/models/anthropic.py
def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    completion = self.client.messages.create(
        **messages,
        **inference_kwargs,
    )
    return completion.content[0].text

generate_stream(model_input, output_type=None, **inference_kwargs)

Stream text using Anthropic.

Parameters:

Name Type Description Default
model_input Union[Chat, list, str]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

As structured generation is not supported by Anthropic, the value of this argument must be None. Otherwise, an error will be raised at runtime.

None
**inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/anthropic.py
def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    stream = self.client.messages.create(
        **messages,
        stream=True,
        **inference_kwargs,
    )

    for chunk in stream:
        if (
            chunk.type == "content_block_delta"
            and chunk.delta.type == "text_delta"
        ):
            yield chunk.delta.text

AnthropicTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the Anthropic model.

AnthropicTypeAdapter is responsible for preparing the arguments to Anthropic's messages.create method: the input (prompt and possibly image). Anthropic does not support defining the output type, so format_output_type is not implemented.

Source code in outlines/models/anthropic.py
class AnthropicTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Anthropic` model.

    `AnthropicTypeAdapter` is responsible for preparing the arguments to
    Anthropic's `messages.create` method: the input (prompt and possibly
    image).
    Anthropic does not support defining the output type, so
    `format_output_type` is not implemented.

    """

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the `messages` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        dict
            The `messages` argument to pass to the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "Anthropic. The only available types are `str`, `list` and `Chat` "
            "(containing a prompt and images)."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> dict:
        return {
            "messages": [self._create_message("user", model_input)]
        }

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> dict:
        return {
            "messages": [
                self._create_message("user", model_input)
            ]
        }

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> dict:
        """Generate the `messages` argument to pass to the client when the user
        passes a Chat instance.

        """
        return {
            "messages": [
                self._create_message(message["role"], message["content"])
                for message in model_input.messages
            ]
        }

    def _create_message(self, role: str, content: str | list) -> dict:
        """Create a message."""

        if isinstance(content, str):
            return {
                "role": role,
                "content": content,
            }

        elif isinstance(content, list):
            prompt = content[0]
            images = content[1:]

            if not all(isinstance(image, Image) for image in images):
                raise ValueError("All assets provided must be of type Image")

            image_content_messages = [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": image.image_format,
                        "data": image.image_str,
                    },
                }
                for image in images
            ]

            return {
                "role": role,
                "content": [
                    *image_content_messages,
                    {"type": "text", "text": prompt},
                ],
            }

        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                "The content must be a string or a list containing a string "
                "and a list of images."
            )

    def format_output_type(self, output_type):
        """Not implemented for Anthropic."""
        if output_type is None:
            return {}
        else:
            raise NotImplementedError(
                f"The output type {output_type} is not available with "
                "Anthropic."
            )

format_chat_model_input(model_input)

Generate the messages argument to pass to the client when the user passes a Chat instance.

Source code in outlines/models/anthropic.py
@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> dict:
    """Generate the `messages` argument to pass to the client when the user
    passes a Chat instance.

    """
    return {
        "messages": [
            self._create_message(message["role"], message["content"])
            for message in model_input.messages
        ]
    }

format_input(model_input)

Generate the messages argument to pass to the client.

Parameters:

Name Type Description Default
model_input

The input provided by the user.

required

Returns:

Type Description
dict

The messages argument to pass to the client.

Source code in outlines/models/anthropic.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the `messages` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    dict
        The `messages` argument to pass to the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "Anthropic. The only available types are `str`, `list` and `Chat` "
        "(containing a prompt and images)."
    )

format_output_type(output_type)

Not implemented for Anthropic.

Source code in outlines/models/anthropic.py
def format_output_type(self, output_type):
    """Not implemented for Anthropic."""
    if output_type is None:
        return {}
    else:
        raise NotImplementedError(
            f"The output type {output_type} is not available with "
            "Anthropic."
        )

from_anthropic(client, model_name=None)

Create an Outlines Anthropic model instance from an anthropic.Anthropic client instance.

Parameters:

Name Type Description Default
client Anthropic

An anthropic.Anthropic client instance.

required
model_name Optional[str]

The name of the model to use.

None

Returns:

Type Description
Anthropic

An Outlines Anthropic model instance.

Source code in outlines/models/anthropic.py
def from_anthropic(
    client: "AnthropicClient", model_name: Optional[str] = None
) -> Anthropic:
    """Create an Outlines `Anthropic` model instance from an
    `anthropic.Anthropic` client instance.

    Parameters
    ----------
    client
        An `anthropic.Anthropic` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Anthropic
        An Outlines `Anthropic` model instance.

    """
    return Anthropic(client, model_name)

base

Base classes for all models and model type adapters.

AsyncModel

Bases: ABC

Base class for all asynchronous models.

This class defines shared __call__, batch and stream methods that can be used to call the model directly. The generate, generate_batch, and generate_stream methods must be implemented by the subclasses. All models inheriting from this class must define a type_adapter attribute of type ModelTypeAdapter. The methods of the type_adapter attribute are used in the generate, generate_batch, and generate_stream methods to format the input and output types received by the model. Additionally, steerable models must define a tensor_library_name attribute.

Source code in outlines/models/base.py
class AsyncModel(ABC):
    """Base class for all asynchronous models.

    This class defines shared `__call__`, `batch` and `stream` methods that can
    be used to call the model directly. The `generate`, `generate_batch`, and
    `generate_stream` methods must be implemented by the subclasses.
    All models inheriting from this class must define a `type_adapter`
    attribute of type `ModelTypeAdapter`. The methods of the `type_adapter`
    attribute are used in the `generate`, `generate_batch`, and
    `generate_stream` methods to format the input and output types received by
    the model.
    Additionally, steerable models must define a `tensor_library_name`
    attribute.

    """
    type_adapter: ModelTypeAdapter
    tensor_library_name: str

    async def __call__(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Call the model.

        Users can call the model directly, in which case we will create a
        generator instance with the output type provided and call it.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        await generator("prompt")
        ```
        and
        ```python
        await model("prompt", Foo)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type)
        return await generator(model_input, **inference_kwargs)

    async def batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Make a batch call to the model (several inputs at once).

        Users can use the `batch` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `batch` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        await generator.batch(["prompt1", "prompt2"])
        ```
        and
        ```python
        await model.batch(["prompt1", "prompt2"], Foo)
        ```

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type)
        return await generator.batch(model_input, **inference_kwargs) # type: ignore

    async def stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> AsyncIterator[Any]:
        """Stream a response from the model.

        Users can use the `stream` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `stream` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        async for chunk in generator("prompt"):
            print(chunk)
        ```
        and
        ```python
        async for chunk in model.stream("prompt", Foo):
            print(chunk)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        AsyncIterator[Any]
            A stream of responses from the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type)

        async for chunk in generator.stream(model_input, **inference_kwargs):  # type: ignore
            yield chunk

    @abstractmethod
    async def generate(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Generate a response from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        ...

    @abstractmethod
    async def generate_batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Generate a batch of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        ...

    @abstractmethod
    async def generate_stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> AsyncIterator[Any]:
        """Generate a stream of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        AsyncIterator[Any]
            A coroutine that will produce an async iterator of responses from the model.

        """
        ...

__call__(model_input, output_type=None, **inference_kwargs) async

Call the model.

Users can call the model directly, in which case we will create a generator instance with the output type provided and call it. Thus, those commands are equivalent:

generator = Generator(model, Foo)
await generator("prompt")
and
await model("prompt", Foo)

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
Any

The response generated by the model.

Source code in outlines/models/base.py
async def __call__(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Call the model.

    Users can call the model directly, in which case we will create a
    generator instance with the output type provided and call it.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    await generator("prompt")
    ```
    and
    ```python
    await model("prompt", Foo)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type)
    return await generator(model_input, **inference_kwargs)

batch(model_input, output_type=None, **inference_kwargs) async

Make a batch call to the model (several inputs at once).

Users can use the batch method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its batch method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
await generator.batch(["prompt1", "prompt2"])
and
await model.batch(["prompt1", "prompt2"], Foo)

Parameters:

Name Type Description Default
model_input List[Any]

The list of inputs provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
List[Any]

The list of responses generated by the model.

Source code in outlines/models/base.py
async def batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Make a batch call to the model (several inputs at once).

    Users can use the `batch` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `batch` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    await generator.batch(["prompt1", "prompt2"])
    ```
    and
    ```python
    await model.batch(["prompt1", "prompt2"], Foo)
    ```

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type)
    return await generator.batch(model_input, **inference_kwargs) # type: ignore

generate(model_input, output_type=None, **inference_kwargs) abstractmethod async

Generate a response from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
Any

The response generated by the model.

Source code in outlines/models/base.py
@abstractmethod
async def generate(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Generate a response from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    ...

generate_batch(model_input, output_type=None, **inference_kwargs) abstractmethod async

Generate a batch of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name Type Description Default
model_input List[Any]

The list of inputs provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
List[Any]

The list of responses generated by the model.

Source code in outlines/models/base.py
@abstractmethod
async def generate_batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Generate a batch of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    ...

generate_stream(model_input, output_type=None, **inference_kwargs) abstractmethod async

Generate a stream of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
AsyncIterator[Any]

A coroutine that will produce an async iterator of responses from the model.

Source code in outlines/models/base.py
@abstractmethod
async def generate_stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> AsyncIterator[Any]:
    """Generate a stream of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    AsyncIterator[Any]
        A coroutine that will produce an async iterator of responses from the model.

    """
    ...

stream(model_input, output_type=None, **inference_kwargs) async

Stream a response from the model.

Users can use the stream method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its stream method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
async for chunk in generator("prompt"):
    print(chunk)
and
async for chunk in model.stream("prompt", Foo):
    print(chunk)

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
AsyncIterator[Any]

A stream of responses from the model.

Source code in outlines/models/base.py
async def stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> AsyncIterator[Any]:
    """Stream a response from the model.

    Users can use the `stream` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `stream` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    async for chunk in generator("prompt"):
        print(chunk)
    ```
    and
    ```python
    async for chunk in model.stream("prompt", Foo):
        print(chunk)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    AsyncIterator[Any]
        A stream of responses from the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type)

    async for chunk in generator.stream(model_input, **inference_kwargs):  # type: ignore
        yield chunk

Model

Bases: ABC

Base class for all synchronous models.

This class defines shared __call__, batch and stream methods that can be used to call the model directly. The generate, generate_batch, and generate_stream methods must be implemented by the subclasses. All models inheriting from this class must define a type_adapter attribute of type ModelTypeAdapter. The methods of the type_adapter attribute are used in the generate, generate_batch, and generate_stream methods to format the input and output types received by the model. Additionally, steerable models must define a tensor_library_name attribute.

Source code in outlines/models/base.py
class Model(ABC):
    """Base class for all synchronous models.

    This class defines shared `__call__`, `batch` and `stream` methods that can
    be used to call the model directly. The `generate`, `generate_batch`, and
    `generate_stream` methods must be implemented by the subclasses.
    All models inheriting from this class must define a `type_adapter`
    attribute of type `ModelTypeAdapter`. The methods of the `type_adapter`
    attribute are used in the `generate`, `generate_batch`, and
    `generate_stream` methods to format the input and output types received by
    the model.
    Additionally, steerable models must define a `tensor_library_name`
    attribute.

    """
    type_adapter: ModelTypeAdapter
    tensor_library_name: str

    def __call__(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Call the model.

        Users can call the model directly, in which case we will create a
        generator instance with the output type provided and call it.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        generator("prompt")
        ```
        and
        ```python
        model("prompt", Foo)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        from outlines import Generator

        return Generator(self, output_type)(model_input, **inference_kwargs)

    def batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Make a batch call to the model (several inputs at once).

        Users can use the `batch` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `batch` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        generator.batch(["prompt1", "prompt2"])
        ```
        and
        ```python
        model.batch(["prompt1", "prompt2"], Foo)
        ```

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type)
        return generator.batch(model_input, **inference_kwargs) # type: ignore

    def stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Stream a response from the model.

        Users can use the `stream` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `stream` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        for chunk in generator("prompt"):
            print(chunk)
        ```
        and
        ```python
        for chunk in model.stream("prompt", Foo):
            print(chunk)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type)
        return generator.stream(model_input, **inference_kwargs) # type: ignore

    @abstractmethod
    def generate(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Generate a response from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        ...

    @abstractmethod
    def generate_batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Generate a batch of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        ...
    @abstractmethod
    def generate_stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Generate a stream of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        ...

__call__(model_input, output_type=None, **inference_kwargs)

Call the model.

Users can call the model directly, in which case we will create a generator instance with the output type provided and call it. Thus, those commands are equivalent:

generator = Generator(model, Foo)
generator("prompt")
and
model("prompt", Foo)

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
Any

The response generated by the model.

Source code in outlines/models/base.py
def __call__(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Call the model.

    Users can call the model directly, in which case we will create a
    generator instance with the output type provided and call it.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    generator("prompt")
    ```
    and
    ```python
    model("prompt", Foo)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    from outlines import Generator

    return Generator(self, output_type)(model_input, **inference_kwargs)

batch(model_input, output_type=None, **inference_kwargs)

Make a batch call to the model (several inputs at once).

Users can use the batch method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its batch method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
generator.batch(["prompt1", "prompt2"])
and
model.batch(["prompt1", "prompt2"], Foo)

Parameters:

Name Type Description Default
model_input List[Any]

The list of inputs provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
List[Any]

The list of responses generated by the model.

Source code in outlines/models/base.py
def batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Make a batch call to the model (several inputs at once).

    Users can use the `batch` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `batch` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    generator.batch(["prompt1", "prompt2"])
    ```
    and
    ```python
    model.batch(["prompt1", "prompt2"], Foo)
    ```

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type)
    return generator.batch(model_input, **inference_kwargs) # type: ignore

generate(model_input, output_type=None, **inference_kwargs) abstractmethod

Generate a response from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
Any

The response generated by the model.

Source code in outlines/models/base.py
@abstractmethod
def generate(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Generate a response from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    ...

generate_batch(model_input, output_type=None, **inference_kwargs) abstractmethod

Generate a batch of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name Type Description Default
model_input List[Any]

The list of inputs provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
List[Any]

The list of responses generated by the model.

Source code in outlines/models/base.py
@abstractmethod
def generate_batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Generate a batch of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    ...

generate_stream(model_input, output_type=None, **inference_kwargs) abstractmethod

Generate a stream of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
Iterator[Any]

A stream of responses from the model.

Source code in outlines/models/base.py
@abstractmethod
def generate_stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Generate a stream of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    ...

stream(model_input, output_type=None, **inference_kwargs)

Stream a response from the model.

Users can use the stream method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its stream method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
for chunk in generator("prompt"):
    print(chunk)
and
for chunk in model.stream("prompt", Foo):
    print(chunk)

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required
output_type Optional[Any]

The output type provided by the user.

None
**inference_kwargs Any

Additional keyword arguments to pass to the model.

{}

Returns:

Type Description
Iterator[Any]

A stream of responses from the model.

Source code in outlines/models/base.py
def stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Stream a response from the model.

    Users can use the `stream` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `stream` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    for chunk in generator("prompt"):
        print(chunk)
    ```
    and
    ```python
    for chunk in model.stream("prompt", Foo):
        print(chunk)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type)
    return generator.stream(model_input, **inference_kwargs) # type: ignore

ModelTypeAdapter

Bases: ABC

Base class for all model type adapters.

A type adapter instance must be given as a value to the type_adapter attribute when instantiating a model. The type adapter is responsible for formatting the input and output types passed to the model to match the specific format expected by the associated model.

Source code in outlines/models/base.py
class ModelTypeAdapter(ABC):
    """Base class for all model type adapters.

    A type adapter instance must be given as a value to the `type_adapter`
    attribute when instantiating a model.
    The type adapter is responsible for formatting the input and output types
    passed to the model to match the specific format expected by the
    associated model.

    """

    @abstractmethod
    def format_input(self, model_input: Any) -> Any:
        """Format the user input to the expected format of the model.

        For API-based models, it typically means creating the `messages`
        argument passed to the client. For local models, it can mean casting
        the input from str to list for instance.
        This method is also used to validate that the input type provided by
        the user is supported by the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        Any
            The formatted input to be passed to the model.

        """
        ...

    @abstractmethod
    def format_output_type(self, output_type: Optional[Any] = None) -> Any:
        """Format the output type to the expected format of the model.

        For black-box models, this typically means creating a `response_format`
        argument. For steerable models, it means formatting the logits processor
        to create the object type expected by the model.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        Any
            The formatted output type to be passed to the model.

        """
        ...

format_input(model_input) abstractmethod

Format the user input to the expected format of the model.

For API-based models, it typically means creating the messages argument passed to the client. For local models, it can mean casting the input from str to list for instance. This method is also used to validate that the input type provided by the user is supported by the model.

Parameters:

Name Type Description Default
model_input Any

The input provided by the user.

required

Returns:

Type Description
Any

The formatted input to be passed to the model.

Source code in outlines/models/base.py
@abstractmethod
def format_input(self, model_input: Any) -> Any:
    """Format the user input to the expected format of the model.

    For API-based models, it typically means creating the `messages`
    argument passed to the client. For local models, it can mean casting
    the input from str to list for instance.
    This method is also used to validate that the input type provided by
    the user is supported by the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    Any
        The formatted input to be passed to the model.

    """
    ...

format_output_type(output_type=None) abstractmethod

Format the output type to the expected format of the model.

For black-box models, this typically means creating a response_format argument. For steerable models, it means formatting the logits processor to create the object type expected by the model.

Parameters:

Name Type Description Default
output_type Optional[Any]

The output type provided by the user.

None

Returns:

Type Description
Any

The formatted output type to be passed to the model.

Source code in outlines/models/base.py
@abstractmethod
def format_output_type(self, output_type: Optional[Any] = None) -> Any:
    """Format the output type to the expected format of the model.

    For black-box models, this typically means creating a `response_format`
    argument. For steerable models, it means formatting the logits processor
    to create the object type expected by the model.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    Any
        The formatted output type to be passed to the model.

    """
    ...

dottxt

Integration with Dottxt's API.

Dottxt

Bases: Model

Thin wrapper around the dottxt.client.Dottxt client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the dottxt.client.Dottxt client.

Source code in outlines/models/dottxt.py
class Dottxt(Model):
    """Thin wrapper around the `dottxt.client.Dottxt` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `dottxt.client.Dottxt` client.

    """

    def __init__(
        self,
        client: "DottxtClient",
        model_name: Optional[str] = None,
        model_revision: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            A `dottxt.Dottxt` client.
        model_name
            The name of the model to use.
        model_revision
            The revision of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.model_revision = model_revision
        self.type_adapter = DottxtTypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Dottxt.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        prompt = self.type_adapter.format_input(model_input)
        json_schema = self.type_adapter.format_output_type(output_type)

        if (
            "model_name" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model_name"] = self.model_name

        if (
            "model_revision" not in inference_kwargs
            and self.model_revision is not None
        ):
            inference_kwargs["model_revision"] = self.model_revision

        completion = self.client.json(
            prompt,
            json_schema,
            **inference_kwargs,
        )
        return completion.data

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support batch generation."
        )

    def generate_stream(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        """Not available for Dottxt."""
        raise NotImplementedError(
            "Dottxt does not support streaming. Call the model/generator for "
            + "regular generation instead."
        )

__init__(client, model_name=None, model_revision=None)

Parameters:

Name Type Description Default
client Dottxt

A dottxt.Dottxt client.

required
model_name Optional[str]

The name of the model to use.

None
model_revision Optional[str]

The revision of the model to use.

None
Source code in outlines/models/dottxt.py
def __init__(
    self,
    client: "DottxtClient",
    model_name: Optional[str] = None,
    model_revision: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        A `dottxt.Dottxt` client.
    model_name
        The name of the model to use.
    model_revision
        The revision of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.model_revision = model_revision
    self.type_adapter = DottxtTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using Dottxt.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.

None
**inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
str

The text generated by the model.

Source code in outlines/models/dottxt.py
def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Dottxt.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    prompt = self.type_adapter.format_input(model_input)
    json_schema = self.type_adapter.format_output_type(output_type)

    if (
        "model_name" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model_name"] = self.model_name

    if (
        "model_revision" not in inference_kwargs
        and self.model_revision is not None
    ):
        inference_kwargs["model_revision"] = self.model_revision

    completion = self.client.json(
        prompt,
        json_schema,
        **inference_kwargs,
    )
    return completion.data

generate_stream(model_input, output_type=None, **inference_kwargs)

Not available for Dottxt.

Source code in outlines/models/dottxt.py
def generate_stream(
    self,
    model_input,
    output_type=None,
    **inference_kwargs,
):
    """Not available for Dottxt."""
    raise NotImplementedError(
        "Dottxt does not support streaming. Call the model/generator for "
        + "regular generation instead."
    )

DottxtTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the Dottxt model.

Source code in outlines/models/dottxt.py
class DottxtTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Dottxt` model."""

    def format_input(self, model_input: str) -> str:
        """Format the prompt to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str
            The input to pass to the client.

        """
        if isinstance(model_input, str):
            return model_input
        raise TypeError(
            f"The input type {model_input} is not available with Dottxt. "
            "The only available type is `str`."
        )

    def format_output_type(self, output_type: Optional[Any] = None) -> str:
        """Format the output type to pass to the client.

        TODO: `int`, `float` and other Python types could be supported via
        JSON Schema.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        str
            The output type to pass to the client.

        """
        # Unsupported languages
        if output_type is None:
            raise TypeError(
                "You must provide an output type. Dottxt only supports "
                "constrained generation."
            )
        elif isinstance(output_type, Regex):
            raise TypeError(
                "Regex-based structured outputs will soon be available with "
                "Dottxt. Use an open source model in the meantime."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs will soon be available with "
                "Dottxt. Use an open source model in the meantime."
            )

        elif isinstance(output_type, JsonSchema):
            return output_type.schema
        elif is_dataclass(output_type):
            schema = TypeAdapter(output_type).json_schema()
            return json.dumps(schema)
        elif is_typed_dict(output_type):
            schema = TypeAdapter(output_type).json_schema()
            return json.dumps(schema)
        elif is_pydantic_model(output_type):
            schema = output_type.model_json_schema()
            return json.dumps(schema)
        elif is_genson_schema_builder(output_type):
            return output_type.to_json()
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by Dottxt. "
                "Consider using a local mode instead."
            )

format_input(model_input)

Format the prompt to pass to the client.

Parameters:

Name Type Description Default
model_input str

The input provided by the user.

required

Returns:

Type Description
str

The input to pass to the client.

Source code in outlines/models/dottxt.py
def format_input(self, model_input: str) -> str:
    """Format the prompt to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str
        The input to pass to the client.

    """
    if isinstance(model_input, str):
        return model_input
    raise TypeError(
        f"The input type {model_input} is not available with Dottxt. "
        "The only available type is `str`."
    )

format_output_type(output_type=None)

Format the output type to pass to the client.

TODO: int, float and other Python types could be supported via JSON Schema.

Parameters:

Name Type Description Default
output_type Optional[Any]

The output type provided by the user.

None

Returns:

Type Description
str

The output type to pass to the client.

Source code in outlines/models/dottxt.py
def format_output_type(self, output_type: Optional[Any] = None) -> str:
    """Format the output type to pass to the client.

    TODO: `int`, `float` and other Python types could be supported via
    JSON Schema.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    str
        The output type to pass to the client.

    """
    # Unsupported languages
    if output_type is None:
        raise TypeError(
            "You must provide an output type. Dottxt only supports "
            "constrained generation."
        )
    elif isinstance(output_type, Regex):
        raise TypeError(
            "Regex-based structured outputs will soon be available with "
            "Dottxt. Use an open source model in the meantime."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs will soon be available with "
            "Dottxt. Use an open source model in the meantime."
        )

    elif isinstance(output_type, JsonSchema):
        return output_type.schema
    elif is_dataclass(output_type):
        schema = TypeAdapter(output_type).json_schema()
        return json.dumps(schema)
    elif is_typed_dict(output_type):
        schema = TypeAdapter(output_type).json_schema()
        return json.dumps(schema)
    elif is_pydantic_model(output_type):
        schema = output_type.model_json_schema()
        return json.dumps(schema)
    elif is_genson_schema_builder(output_type):
        return output_type.to_json()
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by Dottxt. "
            "Consider using a local mode instead."
        )

from_dottxt(client, model_name=None, model_revision=None)

Create an Outlines Dottxt model instance from a dottxt.Dottxt client instance.

Parameters:

Name Type Description Default
client Dottxt

A dottxt.Dottxt client instance.

required
model_name Optional[str]

The name of the model to use.

None
model_revision Optional[str]

The revision of the model to use.

None

Returns:

Type Description
Dottxt

An Outlines Dottxt model instance.

Source code in outlines/models/dottxt.py
def from_dottxt(
    client: "DottxtClient",
    model_name: Optional[str] = None,
    model_revision: Optional[str] = None,
) -> Dottxt:
    """Create an Outlines `Dottxt` model instance from a `dottxt.Dottxt`
    client instance.

    Parameters
    ----------
    client
        A `dottxt.Dottxt` client instance.
    model_name
        The name of the model to use.
    model_revision
        The revision of the model to use.

    Returns
    -------
    Dottxt
        An Outlines `Dottxt` model instance.

    """
    return Dottxt(client, model_name, model_revision)

gemini

Integration with Gemini's API.

Gemini

Bases: Model

Thin wrapper around the google.genai.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the google.genai.Client client.

Source code in outlines/models/gemini.py
class Gemini(Model):
    """Thin wrapper around the `google.genai.Client` client.

    This wrapper is used to convert the input and output types specified by
    the users at a higher level to arguments to the `google.genai.Client`
    client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            A `google.genai.Client` instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = GeminiTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> str:
        """Generate a response from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        completion = self.client.models.generate_content(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs}
        )

        return completion.text

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Gemini does not support batch generation."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        stream = self.client.models.generate_content_stream(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs},
        )

        for chunk in stream:
            if hasattr(chunk, "text") and chunk.text:
                yield chunk.text

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client Client

A google.genai.Client instance.

required
model_name Optional[str]

The name of the model to use.

None
Source code in outlines/models/gemini.py
def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = GeminiTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate a response from the model.

Parameters:

Name Type Description Default
model_input Union[Chat, list, str]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.

None
**inference_kwargs

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
str

The response generated by the model.

Source code in outlines/models/gemini.py
def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> str:
    """Generate a response from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    completion = self.client.models.generate_content(
        **contents,
        model=inference_kwargs.pop("model", self.model_name),
        config={**generation_config, **inference_kwargs}
    )

    return completion.text

generate_stream(model_input, output_type=None, **inference_kwargs)

Generate a stream of responses from the model.

Parameters:

Name Type Description Default
model_input Union[Chat, list, str]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.

None
**inference_kwargs

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/gemini.py
def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    stream = self.client.models.generate_content_stream(
        **contents,
        model=inference_kwargs.pop("model", self.model_name),
        config={**generation_config, **inference_kwargs},
    )

    for chunk in stream:
        if hasattr(chunk, "text") and chunk.text:
            yield chunk.text

GeminiTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the Gemini model.

GeminiTypeAdapter is responsible for preparing the arguments to Gemini's client models.generate_content method: the input (prompt and possibly image), as well as the output type (either JSON or multiple choice).

Source code in outlines/models/gemini.py
class GeminiTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Gemini` model.

    `GeminiTypeAdapter` is responsible for preparing the arguments to Gemini's
    client `models.generate_content` method: the input (prompt and possibly
    image), as well as the output type (either JSON or multiple choice).

    """

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the `contents` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        dict
            The `contents` argument to pass to the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "Gemini. The only available types are `str`, `list` and `Chat` "
            "(containing a prompt and images)."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> dict:
        return {"contents": [self._create_text_part(model_input)]}

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> dict:
        return {
            "contents": [
                self._create_message("user", model_input)
            ]
        }

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> dict:
        """Generate the `contents` argument to pass to the client when the user
        passes a Chat instance.

        """
        return {
            "contents": [
                self._create_message(message["role"], message["content"])
                for message in model_input.messages
            ]
        }

    def _create_message(self, role: str, content: str | list) -> dict:
        """Create a message."""

        # Gemini uses "model" instead of "assistant"
        if role == "assistant":
            role = "model"

        if isinstance(content, str):
            return {
                "role": role,
                "parts": [self._create_text_part(content)],
            }

        elif isinstance(content, list):
            prompt = content[0]
            images = content[1:]

            if not all(isinstance(image, Image) for image in images):
                raise ValueError("All assets provided must be of type Image")

            image_parts = [
                self._create_img_part(image)
                for image in images
            ]

            return {
                "role": role,
                "parts": [
                    self._create_text_part(prompt),
                    *image_parts,
                ],
            }

        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                "The content must be a string or a list containing a string "
                "and a list of images."
            )

        return {"contents": [prompt, *image_parts]}


    def _create_text_part(self, text: str) -> dict:
        """Create a text input part for a message."""
        return {
            "text": text,
        }

    def _create_img_part(self, image: Image) -> dict:
        """Create an image input part for a message."""
        return {
            "inline_data": {
                "mime_type": image.image_format,
                "data": image.image_str,
            }
        }

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the `generation_config` argument to pass to the client.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        dict
            The `generation_config` argument to pass to the client.

        """

        # Unsupported output pytes
        if isinstance(output_type, Regex):
            raise TypeError(
                "Neither regex-based structured outputs nor the `pattern` "
                "keyword in Json Schema are available with Gemini. Use an "
                "open source model or dottxt instead."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not available with Gemini. "
                "Use an open source model or dottxt instead."
            )
        elif is_genson_schema_builder(output_type):
            raise TypeError(
                "The Gemini SDK does not accept Genson schema builders as an "
                "input. Pass a Pydantic model, typed dict or dataclass "
                "instead."
            )
        elif isinstance(output_type, JsonSchema):
            raise TypeError(
                "The Gemini SDK does not accept Json Schemas as an input. "
                "Pass a Pydantic model, typed dict or dataclass instead."
            )

        if output_type is None:
            return {}

        # Structured types
        elif is_dataclass(output_type):
            return self.format_json_output_type(output_type)
        elif is_typed_dict(output_type):
            return self.format_json_output_type(output_type)
        elif is_pydantic_model(output_type):
            return self.format_json_output_type(output_type)

        # List of structured types
        elif is_typing_list(output_type):
            return self.format_list_output_type(output_type)

        # Multiple choice types
        elif is_enum(output_type):
            return self.format_enum_output_type(output_type)
        elif is_literal(output_type):
            enum = get_enum_from_literal(output_type)
            return self.format_enum_output_type(enum)
        elif isinstance(output_type, Choice):
            enum = get_enum_from_choice(output_type)
            return self.format_enum_output_type(enum)

        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by Gemini. "
                "Consider using a local model or dottxt instead."
            )

    def format_enum_output_type(self, output_type: Optional[Any]) -> dict:
        return {
            "response_mime_type": "text/x.enum",
            "response_schema": output_type,
        }

    def format_json_output_type(self, output_type: Optional[Any]) -> dict:
        return {
            "response_mime_type": "application/json",
            "response_schema": output_type,
        }

    def format_list_output_type(self, output_type: Optional[Any]) -> dict:
        args = get_args(output_type)

        if len(args) == 1:
            item_type = args[0]

            # Check if list item type is supported
            if (
                is_pydantic_model(item_type)
                or is_typed_dict(item_type)
                or is_dataclass(item_type)
            ):
                return {
                    "response_mime_type": "application/json",
                    "response_schema": output_type,
                }

            else:
                raise TypeError(
                    "The only supported types for list items are Pydantic "
                    + "models, typed dicts and dataclasses."
                )

        raise TypeError(
            f"Gemini only supports homogeneous lists: "
            "list[BaseModel], list[TypedDict] or list[dataclass]. "
            f"Got {output_type} instead."
        )

format_chat_model_input(model_input)

Generate the contents argument to pass to the client when the user passes a Chat instance.

Source code in outlines/models/gemini.py
@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> dict:
    """Generate the `contents` argument to pass to the client when the user
    passes a Chat instance.

    """
    return {
        "contents": [
            self._create_message(message["role"], message["content"])
            for message in model_input.messages
        ]
    }

format_input(model_input)

Generate the contents argument to pass to the client.

Parameters:

Name Type Description Default
model_input

The input provided by the user.

required

Returns:

Type Description
dict

The contents argument to pass to the client.

Source code in outlines/models/gemini.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the `contents` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    dict
        The `contents` argument to pass to the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "Gemini. The only available types are `str`, `list` and `Chat` "
        "(containing a prompt and images)."
    )

format_output_type(output_type=None)

Generate the generation_config argument to pass to the client.

Parameters:

Name Type Description Default
output_type Optional[Any]

The output type provided by the user.

None

Returns:

Type Description
dict

The generation_config argument to pass to the client.

Source code in outlines/models/gemini.py
def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the `generation_config` argument to pass to the client.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    dict
        The `generation_config` argument to pass to the client.

    """

    # Unsupported output pytes
    if isinstance(output_type, Regex):
        raise TypeError(
            "Neither regex-based structured outputs nor the `pattern` "
            "keyword in Json Schema are available with Gemini. Use an "
            "open source model or dottxt instead."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not available with Gemini. "
            "Use an open source model or dottxt instead."
        )
    elif is_genson_schema_builder(output_type):
        raise TypeError(
            "The Gemini SDK does not accept Genson schema builders as an "
            "input. Pass a Pydantic model, typed dict or dataclass "
            "instead."
        )
    elif isinstance(output_type, JsonSchema):
        raise TypeError(
            "The Gemini SDK does not accept Json Schemas as an input. "
            "Pass a Pydantic model, typed dict or dataclass instead."
        )

    if output_type is None:
        return {}

    # Structured types
    elif is_dataclass(output_type):
        return self.format_json_output_type(output_type)
    elif is_typed_dict(output_type):
        return self.format_json_output_type(output_type)
    elif is_pydantic_model(output_type):
        return self.format_json_output_type(output_type)

    # List of structured types
    elif is_typing_list(output_type):
        return self.format_list_output_type(output_type)

    # Multiple choice types
    elif is_enum(output_type):
        return self.format_enum_output_type(output_type)
    elif is_literal(output_type):
        enum = get_enum_from_literal(output_type)
        return self.format_enum_output_type(enum)
    elif isinstance(output_type, Choice):
        enum = get_enum_from_choice(output_type)
        return self.format_enum_output_type(enum)

    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by Gemini. "
            "Consider using a local model or dottxt instead."
        )

from_gemini(client, model_name=None)

Create an Outlines Gemini model instance from a google.genai.Client instance.

Parameters:

Name Type Description Default
client Client

A google.genai.Client instance.

required
model_name Optional[str]

The name of the model to use.

None

Returns:

Type Description
Gemini

An Outlines Gemini model instance.

Source code in outlines/models/gemini.py
def from_gemini(client: "Client", model_name: Optional[str] = None) -> Gemini:
    """Create an Outlines `Gemini` model instance from a
    `google.genai.Client` instance.

    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Gemini
        An Outlines `Gemini` model instance.

    """
    return Gemini(client, model_name)

llamacpp

Integration with the llama-cpp-python library.

LlamaCpp

Bases: Model

Thin wrapper around the llama_cpp.Llama model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the llama_cpp.Llama model.

Source code in outlines/models/llamacpp.py
class LlamaCpp(Model):
    """Thin wrapper around the `llama_cpp.Llama` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `llama_cpp.Llama` model.
    """

    tensor_library_name = "numpy"

    def __init__(self, model: "Llama"):
        """
        Parameters
        ----------
        model
            A `llama_cpp.Llama` model instance.

        """
        self.model = model
        self.tokenizer = LlamaCppTokenizer(self.model)
        self.type_adapter = LlamaCppTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        if isinstance(output_type, CFGLogitsProcessor):
            raise NotImplementedError(
                "CFG generation is not supported for LlamaCpp due to bug in "
                "the llama_cpp tokenizer"
            )

        prompt = self.type_adapter.format_input(model_input)

        if isinstance(prompt, str):
            completion = self.model(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                **inference_kwargs,
            )
            result = completion["choices"][0]["text"]
        elif isinstance(prompt, list): # pragma: no cover
            completion = self.model.create_chat_completion(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                **inference_kwargs,
            )
            result = completion["choices"][0]["message"]["content"]

        self.model.reset()

        return result

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("LlamaCpp does not support batch generation.")

    def generate_stream(
        self,
        model_input: Union[Chat, str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if isinstance(output_type, CFGLogitsProcessor):
            raise NotImplementedError(
                "CFG generation is not supported for LlamaCpp due to bug in "
                "the llama_cpp tokenizer"
            )

        prompt = self.type_adapter.format_input(model_input)

        if isinstance(prompt, str):
            generator = self.model(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                stream=True,
                **inference_kwargs,
            )
            for chunk in generator:
                yield chunk["choices"][0]["text"]

        elif isinstance(prompt, list): # pragma: no cover
            generator = self.model.create_chat_completion(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                stream=True,
                **inference_kwargs,
            )
            for chunk in generator:
                yield chunk["choices"][0]["delta"].get("content", "")

__init__(model)

Parameters:

Name Type Description Default
model Llama

A llama_cpp.Llama model instance.

required
Source code in outlines/models/llamacpp.py
def __init__(self, model: "Llama"):
    """
    Parameters
    ----------
    model
        A `llama_cpp.Llama` model instance.

    """
    self.model = model
    self.tokenizer = LlamaCppTokenizer(self.model)
    self.type_adapter = LlamaCppTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using llama-cpp-python.

Parameters:

Name Type Description Default
model_input Union[Chat, str]

The prompt based on which the model will generate a response.

required
output_type Optional[OutlinesLogitsProcessor]

The logits processor the model will use to constrain the format of the generated text.

None
**inference_kwargs Any

Additional keyword arguments to pass to the Llama.__call__ method of the llama-cpp-python library.

{}

Returns:

Type Description
str

The text generated by the model.

Source code in outlines/models/llamacpp.py
def generate(
    self,
    model_input: Union[Chat, str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    if isinstance(output_type, CFGLogitsProcessor):
        raise NotImplementedError(
            "CFG generation is not supported for LlamaCpp due to bug in "
            "the llama_cpp tokenizer"
        )

    prompt = self.type_adapter.format_input(model_input)

    if isinstance(prompt, str):
        completion = self.model(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            **inference_kwargs,
        )
        result = completion["choices"][0]["text"]
    elif isinstance(prompt, list): # pragma: no cover
        completion = self.model.create_chat_completion(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            **inference_kwargs,
        )
        result = completion["choices"][0]["message"]["content"]

    self.model.reset()

    return result

generate_stream(model_input, output_type=None, **inference_kwargs)

Stream text using llama-cpp-python.

Parameters:

Name Type Description Default
model_input Union[Chat, str]

The prompt based on which the model will generate a response.

required
output_type Optional[OutlinesLogitsProcessor]

The logits processor the model will use to constrain the format of the generated text.

None
**inference_kwargs Any

Additional keyword arguments to pass to the Llama.__call__ method of the llama-cpp-python library.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/llamacpp.py
def generate_stream(
    self,
    model_input: Union[Chat, str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if isinstance(output_type, CFGLogitsProcessor):
        raise NotImplementedError(
            "CFG generation is not supported for LlamaCpp due to bug in "
            "the llama_cpp tokenizer"
        )

    prompt = self.type_adapter.format_input(model_input)

    if isinstance(prompt, str):
        generator = self.model(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            stream=True,
            **inference_kwargs,
        )
        for chunk in generator:
            yield chunk["choices"][0]["text"]

    elif isinstance(prompt, list): # pragma: no cover
        generator = self.model.create_chat_completion(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            stream=True,
            **inference_kwargs,
        )
        for chunk in generator:
            yield chunk["choices"][0]["delta"].get("content", "")

LlamaCppTokenizer

Bases: Tokenizer

Source code in outlines/models/llamacpp.py
class LlamaCppTokenizer(Tokenizer):
    def __init__(self, model: "Llama"):
        self.eos_token_id = model.token_eos()
        self.eos_token = model.tokenizer().decode([self.eos_token_id])
        self.pad_token_id = self.eos_token_id
        self.special_tokens: Set[str] = set()

        self.vocabulary: Dict[str, int] = dict()

        self.tokenizer = model.tokenizer()

        # TODO: Remove when https://github.com/ggerganov/llama.cpp/pull/5613
        # is resolved
        self._hf_tokenizer = None
        try:
            self.vocabulary = model.tokenizer_.hf_tokenizer.get_vocab()
            self._hf_tokenizer = model.tokenizer_.hf_tokenizer
        except AttributeError:
            # ###
            for t in range(model.n_vocab()):
                token_piece = model.tokenizer().decode([t])
                self.vocabulary[token_piece] = t

        # ensure stable ordering of vocabulary
        self.vocabulary = {
            tok: tok_id
            for tok, tok_id
            in sorted(self.vocabulary.items(), key=lambda x: x[1])
        }

        self._hash = None

    def decode(self, token_ids: List[int]) -> List[str]:
        decoded_bytes = self.tokenizer.detokenize(token_ids)
        return [decoded_bytes.decode("utf-8", errors="ignore")]

    def encode(
        self,
        prompt: Union[str, List[str]],
        add_bos: bool = True,
        special: bool = True,
    ) -> Tuple[List[int], List[int]]:
        if isinstance(prompt, list):
            raise NotImplementedError(
                "llama-cpp-python tokenizer doesn't support batch tokenization"
            )
        token_ids = self.tokenizer.tokenize(
            prompt.encode("utf-8", errors="ignore"),
            add_bos=add_bos,
            special=special,
        )
        # generate attention mask, missing from llama-cpp-python
        attention_mask = [
            1 if token_id != self.pad_token_id else 0 for token_id in token_ids
        ]
        return token_ids, attention_mask

    def convert_token_to_string(self, token: str) -> str:
        if self._hf_tokenizer is not None:
            from transformers.file_utils import SPIECE_UNDERLINE

            token_str = self._hf_tokenizer.convert_tokens_to_string([token])
            if (
                token.startswith(SPIECE_UNDERLINE)
                or token == "<0x20>"
            ):  # pragma: no cover
                token_str = " " + token_str
            return token_str
        else:
            return token

    def __eq__(self, other):
        if not isinstance(other, LlamaCppTokenizer):
            return False
        return self.__getstate__() == other.__getstate__()

    def __hash__(self):
        # We create a custom hash as pickle.dumps(self) is not stable
        if self._hash is None:
            self._hash = hash((
                tuple(sorted(self.vocabulary.items())),
                self.eos_token_id,
                self.eos_token,
                self.pad_token_id,
                tuple(sorted(self.special_tokens)),
            ))
        return self._hash

    def __getstate__(self):
        """Create a stable representation for outlines.caching"""
        return (
            self.vocabulary,
            self.eos_token_id,
            self.eos_token,
            self.pad_token_id,
            sorted(self.special_tokens),
        )

    def __setstate__(self, state):
        raise NotImplementedError("Cannot load a pickled llamacpp tokenizer")

__getstate__()

Create a stable representation for outlines.caching

Source code in outlines/models/llamacpp.py
def __getstate__(self):
    """Create a stable representation for outlines.caching"""
    return (
        self.vocabulary,
        self.eos_token_id,
        self.eos_token,
        self.pad_token_id,
        sorted(self.special_tokens),
    )

LlamaCppTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the LlamaCpp model.

LlamaCppTypeAdapter is responsible for preparing the arguments to the Llama object text generation methods.

Source code in outlines/models/llamacpp.py
class LlamaCppTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `LlamaCpp` model.

    `LlamaCppTypeAdapter` is responsible for preparing the arguments to
    the `Llama` object text generation methods.

    """

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {type(model_input)} is not available with "
            "LlamaCpp. The only available types are `str` and `Chat`."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str:
        return model_input

    @format_input.register(Chat)
    def format_chat_input(self, model_input: Chat) -> list:
        if not all(
            isinstance(message["content"], str)
            for message in model_input.messages
        ):
            raise ValueError(
                "LlamaCpp does not support multi-modal messages."
                + "The content of each message must be a string."
            )

        return  [
            {
                "role": message["role"],
                "content": message["content"],
            }
            for message in model_input.messages
        ]

    def format_output_type(
        self, output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> "LogitsProcessorList":
        """Generate the logits processor argument to pass to the model.

        Parameters
        ----------
        output_type
            The logits processor provided.

        Returns
        -------
        LogitsProcessorList
            The logits processor to pass to the model.

        """
        from llama_cpp import LogitsProcessorList

        return LogitsProcessorList([output_type])

format_input(model_input)

Generate the prompt argument to pass to the model.

Parameters:

Name Type Description Default
model_input

The input provided by the user.

required

Returns:

Type Description
str

The formatted input to be passed to the model.

Source code in outlines/models/llamacpp.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {type(model_input)} is not available with "
        "LlamaCpp. The only available types are `str` and `Chat`."
    )

format_output_type(output_type=None)

Generate the logits processor argument to pass to the model.

Parameters:

Name Type Description Default
output_type Optional[OutlinesLogitsProcessor]

The logits processor provided.

None

Returns:

Type Description
LogitsProcessorList

The logits processor to pass to the model.

Source code in outlines/models/llamacpp.py
def format_output_type(
    self, output_type: Optional[OutlinesLogitsProcessor] = None,
) -> "LogitsProcessorList":
    """Generate the logits processor argument to pass to the model.

    Parameters
    ----------
    output_type
        The logits processor provided.

    Returns
    -------
    LogitsProcessorList
        The logits processor to pass to the model.

    """
    from llama_cpp import LogitsProcessorList

    return LogitsProcessorList([output_type])

from_llamacpp(model)

Create an Outlines LlamaCpp model instance from a llama_cpp.Llama instance.

Parameters:

Name Type Description Default
model Llama

A llama_cpp.Llama instance.

required

Returns:

Type Description
LlamaCpp

An Outlines LlamaCpp model instance.

Source code in outlines/models/llamacpp.py
def from_llamacpp(model: "Llama"):
    """Create an Outlines `LlamaCpp` model instance from a
    `llama_cpp.Llama` instance.

    Parameters
    ----------
    model
        A `llama_cpp.Llama` instance.

    Returns
    -------
    LlamaCpp
        An Outlines `LlamaCpp` model instance.

    """
    return LlamaCpp(model)

mlxlm

Integration with the mlx_lm library.

MLXLM

Bases: Model

Thin wrapper around an mlx_lm model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the mlx_lm library.

Source code in outlines/models/mlxlm.py
class MLXLM(Model):
    """Thin wrapper around an `mlx_lm` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `mlx_lm` library.

    """

    tensor_library_name = "mlx"

    def __init__(
        self,
        model: "nn.Module",
        tokenizer: "PreTrainedTokenizer",
    ):
        """
        Parameters
        ----------
        model
            An instance of an `mlx_lm` model.
        tokenizer
            An instance of an `mlx_lm` tokenizer or of a compatible
            `transformers` tokenizer.

        """
        self.model = model
        # self.mlx_tokenizer is used by the mlx-lm in its generate function
        self.mlx_tokenizer = tokenizer
        # self.tokenizer is used by the logits processor
        self.tokenizer = TransformerTokenizer(tokenizer._tokenizer)
        self.type_adapter = MLXLMTypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> str:
        """Generate text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        from mlx_lm import generate

        return generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `mlx_lm` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> Iterator[str]:
        """Stream text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        from mlx_lm import stream_generate

        for gen_response in stream_generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        ):
            yield gen_response.text

__init__(model, tokenizer)

Parameters:

Name Type Description Default
model Module

An instance of an mlx_lm model.

required
tokenizer PreTrainedTokenizer

An instance of an mlx_lm tokenizer or of a compatible transformers tokenizer.

required
Source code in outlines/models/mlxlm.py
def __init__(
    self,
    model: "nn.Module",
    tokenizer: "PreTrainedTokenizer",
):
    """
    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        `transformers` tokenizer.

    """
    self.model = model
    # self.mlx_tokenizer is used by the mlx-lm in its generate function
    self.mlx_tokenizer = tokenizer
    # self.tokenizer is used by the logits processor
    self.tokenizer = TransformerTokenizer(tokenizer._tokenizer)
    self.type_adapter = MLXLMTypeAdapter()

generate(model_input, output_type=None, **kwargs)

Generate text using mlx-lm.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[OutlinesLogitsProcessor]

The logits processor the model will use to constrain the format of the generated text.

None
kwargs

Additional keyword arguments to pass to the mlx-lm library.

{}

Returns:

Type Description
str

The text generated by the model.

Source code in outlines/models/mlxlm.py
def generate(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> str:
    """Generate text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    from mlx_lm import generate

    return generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    )

generate_stream(model_input, output_type=None, **kwargs)

Stream text using mlx-lm.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[OutlinesLogitsProcessor]

The logits processor the model will use to constrain the format of the generated text.

None
kwargs

Additional keyword arguments to pass to the mlx-lm library.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/mlxlm.py
def generate_stream(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> Iterator[str]:
    """Stream text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    from mlx_lm import stream_generate

    for gen_response in stream_generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    ):
        yield gen_response.text

MLXLMTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the MLXLM model.

Source code in outlines/models/mlxlm.py
class MLXLMTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `MLXLM` model."""

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {input} is not available with mlx-lm. "
            "The only available type is `str`."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str):
        return model_input

    def format_output_type(
        self, output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional[List[OutlinesLogitsProcessor]]:
        """Generate the logits processor argument to pass to the model.

        Parameters
        ----------
        output_type
            The logits processor provided.

        Returns
        -------
        Optional[list[OutlinesLogitsProcessor]]
            The logits processor argument to be passed to the model.

        """
        if not output_type:
            return None
        return [output_type]

format_input(model_input)

Generate the prompt argument to pass to the model.

Parameters:

Name Type Description Default
model_input

The input provided by the user.

required

Returns:

Type Description
str

The formatted input to be passed to the model.

Source code in outlines/models/mlxlm.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {input} is not available with mlx-lm. "
        "The only available type is `str`."
    )

format_output_type(output_type=None)

Generate the logits processor argument to pass to the model.

Parameters:

Name Type Description Default
output_type Optional[OutlinesLogitsProcessor]

The logits processor provided.

None

Returns:

Type Description
Optional[list[OutlinesLogitsProcessor]]

The logits processor argument to be passed to the model.

Source code in outlines/models/mlxlm.py
def format_output_type(
    self, output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional[List[OutlinesLogitsProcessor]]:
    """Generate the logits processor argument to pass to the model.

    Parameters
    ----------
    output_type
        The logits processor provided.

    Returns
    -------
    Optional[list[OutlinesLogitsProcessor]]
        The logits processor argument to be passed to the model.

    """
    if not output_type:
        return None
    return [output_type]

from_mlxlm(model, tokenizer)

Create an Outlines MLXLM model instance from an mlx_lm model and a tokenizer.

Parameters:

Name Type Description Default
model Module

An instance of an mlx_lm model.

required
tokenizer PreTrainedTokenizer

An instance of an mlx_lm tokenizer or of a compatible transformers tokenizer.

required

Returns:

Type Description
MLXLM

An Outlines MLXLM model instance.

Source code in outlines/models/mlxlm.py
def from_mlxlm(model: "nn.Module", tokenizer: "PreTrainedTokenizer") -> MLXLM:
    """Create an Outlines `MLXLM` model instance from an `mlx_lm` model and a
    tokenizer.

    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        transformers tokenizer.

    Returns
    -------
    MLXLM
        An Outlines `MLXLM` model instance.

    """
    return MLXLM(model, tokenizer)

ollama

Integration with the ollama library.

AsyncOllama

Bases: AsyncModel

Thin wrapper around the ollama.AsyncClient client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the ollama.AsyncClient client.

Source code in outlines/models/ollama.py
class AsyncOllama(AsyncModel):
    """Thin wrapper around the `ollama.AsyncClient` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `ollama.AsyncClient` client.

    """

    def __init__(
        self,client: "AsyncClient", model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `ollama.Client` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OllamaTypeAdapter()

    async def generate(self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        response = await self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )
        return response.message.content

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `ollama` library does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        stream = await self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            stream=True,
            **kwargs,
        )
        async for chunk in stream:
            yield chunk.message.content

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client AsyncClient

The ollama.Client client.

required
model_name Optional[str]

The name of the model to use.

None
Source code in outlines/models/ollama.py
def __init__(
    self,client: "AsyncClient", model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `ollama.Client` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OllamaTypeAdapter()

generate(model_input, output_type=None, **kwargs) async

Generate text using Ollama.

Parameters:

Name Type Description Default
model_input Chat | str | list

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.

None
**kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
str

The text generated by the model.

Source code in outlines/models/ollama.py
async def generate(self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    response = await self.client.chat(
        messages=self.type_adapter.format_input(model_input),
        format=self.type_adapter.format_output_type(output_type),
        **kwargs,
    )
    return response.message.content

generate_stream(model_input, output_type=None, **kwargs) async

Stream text using Ollama.

Parameters:

Name Type Description Default
model_input Chat | str | list

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.

None
**kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/ollama.py
async def generate_stream( # type: ignore
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    stream = await self.client.chat(
        messages=self.type_adapter.format_input(model_input),
        format=self.type_adapter.format_output_type(output_type),
        stream=True,
        **kwargs,
    )
    async for chunk in stream:
        yield chunk.message.content

Ollama

Bases: Model

Thin wrapper around the ollama.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the ollama.Client client.

Source code in outlines/models/ollama.py
class Ollama(Model):
    """Thin wrapper around the `ollama.Client` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `ollama.Client` client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            The `ollama.Client` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OllamaTypeAdapter()

    def generate(self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        response = self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )
        return response.message.content

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `ollama` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        response = self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            stream=True,
            **kwargs,
        )
        for chunk in response:
            yield chunk.message.content

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client Client

The ollama.Client client.

required
model_name Optional[str]

The name of the model to use.

None
Source code in outlines/models/ollama.py
def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        The `ollama.Client` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OllamaTypeAdapter()

generate(model_input, output_type=None, **kwargs)

Generate text using Ollama.

Parameters:

Name Type Description Default
model_input Chat | str | list

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.

None
**kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
str

The text generated by the model.

Source code in outlines/models/ollama.py
def generate(self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    response = self.client.chat(
        messages=self.type_adapter.format_input(model_input),
        format=self.type_adapter.format_output_type(output_type),
        **kwargs,
    )
    return response.message.content

generate_stream(model_input, output_type=None, **kwargs)

Stream text using Ollama.

Parameters:

Name Type Description Default
model_input Chat | str | list

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.

None
**kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/ollama.py
def generate_stream(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> Iterator[str]:
    """Stream text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    response = self.client.chat(
        messages=self.type_adapter.format_input(model_input),
        format=self.type_adapter.format_output_type(output_type),
        stream=True,
        **kwargs,
    )
    for chunk in response:
        yield chunk.message.content

OllamaTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the Ollama model.

Source code in outlines/models/ollama.py
class OllamaTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Ollama` model."""

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the value of the `messages` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        list
            The formatted value of the `messages` argument to be passed to
            the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "Ollama. The only available types are `str`, `list` and `Chat`."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user only passes a prompt.

        """
        return [
            self._create_message("user", model_input)
        ]

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user passes a prompt and images.

        """
        return [
            self._create_message("user", model_input)
        ]

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user passes a Chat instance.

        """
        return [
            self._create_message(message["role"], message["content"])
            for message in model_input.messages
        ]

    def _create_message(self, role: str, content: str | list) -> dict:
        """Create a message."""

        if isinstance(content, str):
            return {
                "role": role,
                "content": content,
            }

        elif isinstance(content, list):
            prompt = content[0]
            images = content[1:]

            if not all(isinstance(image, Image) for image in images):
                raise ValueError("All assets provided must be of type Image")

            return {
                "role": role,
                "content": prompt,
                "image": [image.image_str for image in images],
            }

        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                "The content must be a string or a list containing a string "
                "and a list of images."
            )

    def format_output_type(
        self, output_type: Optional[Any] = None
    ) -> Optional[str]:
        """Format the output type to pass to the client.

        TODO: `int`, `float` and other Python types could be supported via
        JSON Schema.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        Optional[str]
            The formatted output type to be passed to the model.

        """
        if isinstance(output_type, Regex):
            raise TypeError(
                "Regex-based structured outputs are not supported by Ollama. "
                "Use an open source model in the meantime."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not supported by Ollama. "
                "Use an open source model in the meantime."
            )

        if output_type is None:
            return None
        elif isinstance(output_type, JsonSchema):
            return json.loads(output_type.schema)
        elif is_dataclass(output_type):
            schema = TypeAdapter(output_type).json_schema()
            return schema
        elif is_typed_dict(output_type):
            schema = TypeAdapter(output_type).json_schema()
            return schema
        elif is_pydantic_model(output_type):
            schema = output_type.model_json_schema()
            return schema
        elif is_genson_schema_builder(output_type):
            return output_type.to_json()
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by Ollama. "
                "Consider using a local model instead."
            )

format_chat_model_input(model_input)

Generate the value of the messages argument to pass to the client when the user passes a Chat instance.

Source code in outlines/models/ollama.py
@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user passes a Chat instance.

    """
    return [
        self._create_message(message["role"], message["content"])
        for message in model_input.messages
    ]

format_input(model_input)

Generate the value of the messages argument to pass to the client.

Parameters:

Name Type Description Default
model_input

The input provided by the user.

required

Returns:

Type Description
list

The formatted value of the messages argument to be passed to the client.

Source code in outlines/models/ollama.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the value of the `messages` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    list
        The formatted value of the `messages` argument to be passed to
        the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "Ollama. The only available types are `str`, `list` and `Chat`."
    )

format_list_model_input(model_input)

Generate the value of the messages argument to pass to the client when the user passes a prompt and images.

Source code in outlines/models/ollama.py
@format_input.register(list)
def format_list_model_input(self, model_input: list) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user passes a prompt and images.

    """
    return [
        self._create_message("user", model_input)
    ]

format_output_type(output_type=None)

Format the output type to pass to the client.

TODO: int, float and other Python types could be supported via JSON Schema.

Parameters:

Name Type Description Default
output_type Optional[Any]

The output type provided by the user.

None

Returns:

Type Description
Optional[str]

The formatted output type to be passed to the model.

Source code in outlines/models/ollama.py
def format_output_type(
    self, output_type: Optional[Any] = None
) -> Optional[str]:
    """Format the output type to pass to the client.

    TODO: `int`, `float` and other Python types could be supported via
    JSON Schema.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    Optional[str]
        The formatted output type to be passed to the model.

    """
    if isinstance(output_type, Regex):
        raise TypeError(
            "Regex-based structured outputs are not supported by Ollama. "
            "Use an open source model in the meantime."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not supported by Ollama. "
            "Use an open source model in the meantime."
        )

    if output_type is None:
        return None
    elif isinstance(output_type, JsonSchema):
        return json.loads(output_type.schema)
    elif is_dataclass(output_type):
        schema = TypeAdapter(output_type).json_schema()
        return schema
    elif is_typed_dict(output_type):
        schema = TypeAdapter(output_type).json_schema()
        return schema
    elif is_pydantic_model(output_type):
        schema = output_type.model_json_schema()
        return schema
    elif is_genson_schema_builder(output_type):
        return output_type.to_json()
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by Ollama. "
            "Consider using a local model instead."
        )

format_str_model_input(model_input)

Generate the value of the messages argument to pass to the client when the user only passes a prompt.

Source code in outlines/models/ollama.py
@format_input.register(str)
def format_str_model_input(self, model_input: str) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user only passes a prompt.

    """
    return [
        self._create_message("user", model_input)
    ]

from_ollama(client, model_name=None)

Create an Outlines Ollama model instance from an ollama.Client or ollama.AsyncClient instance.

Parameters:

Name Type Description Default
client Union[Client, AsyncClient]

A ollama.Client or ollama.AsyncClient instance.

required
model_name Optional[str]

The name of the model to use.

None

Returns:

Type Description
Union[Ollama, AsyncOllama]

An Outlines Ollama or AsyncOllama model instance.

Source code in outlines/models/ollama.py
def from_ollama(
    client: Union["Client", "AsyncClient"], model_name: Optional[str] = None
) -> Union[Ollama, AsyncOllama]:
    """Create an Outlines `Ollama` model instance from an `ollama.Client`
    or `ollama.AsyncClient` instance.

    Parameters
    ----------
    client
        A `ollama.Client` or `ollama.AsyncClient` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[Ollama, AsyncOllama]
        An Outlines `Ollama` or `AsyncOllama` model instance.

    """
    from ollama import AsyncClient, Client

    if isinstance(client, Client):
        return Ollama(client, model_name)
    elif isinstance(client, AsyncClient):
        return AsyncOllama(client, model_name)
    else:
        raise ValueError(
            "Invalid client type, the client must be an instance of "
            "`ollama.Client` or `ollama.AsyncClient`."
        )

openai

Integration with OpenAI's API.

OpenAI

Bases: Model

Thin wrapper around the openai.OpenAI client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client.

Source code in outlines/models/openai.py
class OpenAI(Model):
    """Thin wrapper around the `openai.OpenAI` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client.

    """

    def __init__(
        self,
        client: Union["OpenAIClient", "AzureOpenAIClient"],
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `openai.OpenAI` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OpenAITypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        import openai

        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        try:
            result = self.client.chat.completions.create(
                messages=messages,
                **response_format,
                **inference_kwargs,
            )
        except openai.BadRequestError as e:
            if e.body["message"].startswith("Invalid schema"):
                raise TypeError(
                    f"OpenAI does not support your schema: {e.body['message']}. "
                    "Try a local model or dottxt instead."
                )
            else:
                raise e

        messages = [choice.message for choice in result.choices]
        for message in messages:
            if message.refusal is not None:
                raise ValueError(
                    f"OpenAI refused to answer the request: {message.refusal}"
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `openai` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Stream text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        import openai

        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        stream = self.client.chat.completions.create(
            stream=True,
            messages=messages,
            **response_format,
            **inference_kwargs
        )

        for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client Union[OpenAI, AzureOpenAI]

The openai.OpenAI client.

required
model_name Optional[str]

The name of the model to use.

None
Source code in outlines/models/openai.py
def __init__(
    self,
    client: Union["OpenAIClient", "AzureOpenAIClient"],
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `openai.OpenAI` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OpenAITypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using OpenAI.

Parameters:

Name Type Description Default
model_input Union[Chat, list, str]

The prompt based on which the model will generate a response.

required
output_type Optional[Union[type[BaseModel], str]]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.

None
**inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Union[str, list[str]]

The text generated by the model.

Source code in outlines/models/openai.py
def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    import openai

    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    try:
        result = self.client.chat.completions.create(
            messages=messages,
            **response_format,
            **inference_kwargs,
        )
    except openai.BadRequestError as e:
        if e.body["message"].startswith("Invalid schema"):
            raise TypeError(
                f"OpenAI does not support your schema: {e.body['message']}. "
                "Try a local model or dottxt instead."
            )
        else:
            raise e

    messages = [choice.message for choice in result.choices]
    for message in messages:
        if message.refusal is not None:
            raise ValueError(
                f"OpenAI refused to answer the request: {message.refusal}"
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

generate_stream(model_input, output_type=None, **inference_kwargs)

Stream text using OpenAI.

Parameters:

Name Type Description Default
model_input Union[Chat, list, str]

The prompt based on which the model will generate a response.

required
output_type Optional[Union[type[BaseModel], str]]

The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.

None
**inference_kwargs

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/openai.py
def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Stream text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    import openai

    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    stream = self.client.chat.completions.create(
        stream=True,
        messages=messages,
        **response_format,
        **inference_kwargs
    )

    for chunk in stream:
        if chunk.choices and chunk.choices[0].delta.content is not None:
            yield chunk.choices[0].delta.content

OpenAITypeAdapter

Bases: ModelTypeAdapter

Type adapter for the OpenAI model.

OpenAITypeAdapter is responsible for preparing the arguments to OpenAI's completions.create methods: the input (prompt and possibly image), as well as the output type (only JSON).

Source code in outlines/models/openai.py
class OpenAITypeAdapter(ModelTypeAdapter):
    """Type adapter for the `OpenAI` model.

    `OpenAITypeAdapter` is responsible for preparing the arguments to OpenAI's
    `completions.create` methods: the input (prompt and possibly image), as
    well as the output type (only JSON).

    """

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the `messages` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        dict
            The formatted input to be passed to the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "OpenAI. The only available types are `str`, `list` and `Chat`."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user only passes a prompt.

        """
        return [
            self._create_message("user", model_input)
        ]

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user passes a prompt and images.

        """
        return [
            self._create_message("user", model_input)
        ]

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user passes a Chat instance.

        """
        return [
            self._create_message(message["role"], message["content"])
            for message in model_input.messages
        ]

    def _create_message(self, role: str, content: str | list) -> dict:
        """Create a message."""

        if isinstance(content, str):
            return {
                "role": role,
                "content": content,
            }

        elif isinstance(content, list):
            prompt = content[0]
            images = content[1:]

            if not all(isinstance(image, Image) for image in images):
                raise ValueError("All assets provided must be of type Image")

            image_parts = [
                self._create_img_content(image)
                for image in images
            ]

            return {
                "role": role,
                "content": [
                    {"type": "text", "text": prompt},
                    *image_parts,
                ],
            }

        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                "The content must be a string or a list containing a string "
                "and a list of images."
            )

    def _create_img_content(self, image: Image) -> dict:
        """Create the content for an image input."""
        return {
            "type": "image_url",
            "image_url": {
                "url": f"data:{image.image_format};base64,{image.image_str}"  # noqa: E702
            },
        }

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the `response_format` argument to the client based on the
        output type specified by the user.

        TODO: `int`, `float` and other Python types could be supported via
        JSON Schema.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        dict
            The formatted output type to be passed to the client.

        """
        # Unsupported languages
        if isinstance(output_type, Regex):
            raise TypeError(
                "Neither regex-based structured outputs nor the `pattern` keyword "
                "in Json Schema are available with OpenAI. Use an open source "
                "model or dottxt instead."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not available with OpenAI. "
                "Use an open source model or dottxt instead."
            )

        if output_type is None:
            return {}
        elif is_native_dict(output_type):
            return self.format_json_mode_type()
        elif is_dataclass(output_type):
            output_type = TypeAdapter(output_type).json_schema()
            return self.format_json_output_type(output_type)
        elif is_typed_dict(output_type):
            output_type = TypeAdapter(output_type).json_schema()
            return self.format_json_output_type(output_type)
        elif is_pydantic_model(output_type):
            output_type = output_type.model_json_schema()
            return self.format_json_output_type(output_type)
        elif is_genson_schema_builder(output_type):
            schema = json.loads(output_type.to_json())
            return self.format_json_output_type(schema)
        elif isinstance(output_type, JsonSchema):
            return self.format_json_output_type(json.loads(output_type.schema))
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not available with OpenAI. "
                "Use an open source model or dottxt instead."
            )

    def format_json_output_type(self, schema: dict) -> dict:
        """Generate the `response_format` argument to the client when the user
        specified a `Json` output type.

        """
        # OpenAI requires `additionalProperties` to be set to False
        schema = set_additional_properties_false_json_schema(schema)

        return {
            "response_format": {
                "type": "json_schema",
                "json_schema": {
                    "name": "default",
                    "strict": True,
                    "schema": schema,
                },
            }
        }

    def format_json_mode_type(self) -> dict:
        """Generate the `response_format` argument to the client when the user
        specified the output type should be a JSON but without specifying the
        schema (also called "JSON mode").

        """
        return {"response_format": {"type": "json_object"}}

format_chat_model_input(model_input)

Generate the value of the messages argument to pass to the client when the user passes a Chat instance.

Source code in outlines/models/openai.py
@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user passes a Chat instance.

    """
    return [
        self._create_message(message["role"], message["content"])
        for message in model_input.messages
    ]

format_input(model_input)

Generate the messages argument to pass to the client.

Parameters:

Name Type Description Default
model_input

The input provided by the user.

required

Returns:

Type Description
dict

The formatted input to be passed to the client.

Source code in outlines/models/openai.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the `messages` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    dict
        The formatted input to be passed to the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "OpenAI. The only available types are `str`, `list` and `Chat`."
    )

format_json_mode_type()

Generate the response_format argument to the client when the user specified the output type should be a JSON but without specifying the schema (also called "JSON mode").

Source code in outlines/models/openai.py
def format_json_mode_type(self) -> dict:
    """Generate the `response_format` argument to the client when the user
    specified the output type should be a JSON but without specifying the
    schema (also called "JSON mode").

    """
    return {"response_format": {"type": "json_object"}}

format_json_output_type(schema)

Generate the response_format argument to the client when the user specified a Json output type.

Source code in outlines/models/openai.py
def format_json_output_type(self, schema: dict) -> dict:
    """Generate the `response_format` argument to the client when the user
    specified a `Json` output type.

    """
    # OpenAI requires `additionalProperties` to be set to False
    schema = set_additional_properties_false_json_schema(schema)

    return {
        "response_format": {
            "type": "json_schema",
            "json_schema": {
                "name": "default",
                "strict": True,
                "schema": schema,
            },
        }
    }

format_list_model_input(model_input)

Generate the value of the messages argument to pass to the client when the user passes a prompt and images.

Source code in outlines/models/openai.py
@format_input.register(list)
def format_list_model_input(self, model_input: list) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user passes a prompt and images.

    """
    return [
        self._create_message("user", model_input)
    ]

format_output_type(output_type=None)

Generate the response_format argument to the client based on the output type specified by the user.

TODO: int, float and other Python types could be supported via JSON Schema.

Parameters:

Name Type Description Default
output_type Optional[Any]

The output type provided by the user.

None

Returns:

Type Description
dict

The formatted output type to be passed to the client.

Source code in outlines/models/openai.py
def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the `response_format` argument to the client based on the
    output type specified by the user.

    TODO: `int`, `float` and other Python types could be supported via
    JSON Schema.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    dict
        The formatted output type to be passed to the client.

    """
    # Unsupported languages
    if isinstance(output_type, Regex):
        raise TypeError(
            "Neither regex-based structured outputs nor the `pattern` keyword "
            "in Json Schema are available with OpenAI. Use an open source "
            "model or dottxt instead."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not available with OpenAI. "
            "Use an open source model or dottxt instead."
        )

    if output_type is None:
        return {}
    elif is_native_dict(output_type):
        return self.format_json_mode_type()
    elif is_dataclass(output_type):
        output_type = TypeAdapter(output_type).json_schema()
        return self.format_json_output_type(output_type)
    elif is_typed_dict(output_type):
        output_type = TypeAdapter(output_type).json_schema()
        return self.format_json_output_type(output_type)
    elif is_pydantic_model(output_type):
        output_type = output_type.model_json_schema()
        return self.format_json_output_type(output_type)
    elif is_genson_schema_builder(output_type):
        schema = json.loads(output_type.to_json())
        return self.format_json_output_type(schema)
    elif isinstance(output_type, JsonSchema):
        return self.format_json_output_type(json.loads(output_type.schema))
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not available with OpenAI. "
            "Use an open source model or dottxt instead."
        )

format_str_model_input(model_input)

Generate the value of the messages argument to pass to the client when the user only passes a prompt.

Source code in outlines/models/openai.py
@format_input.register(str)
def format_str_model_input(self, model_input: str) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user only passes a prompt.

    """
    return [
        self._create_message("user", model_input)
    ]

from_openai(client, model_name=None)

Create an Outlines OpenAI model instance from an openai.OpenAI client.

Parameters:

Name Type Description Default
client Union[OpenAI, AzureOpenAI]

An openai.OpenAI client instance.

required
model_name Optional[str]

The name of the model to use.

None

Returns:

Type Description
OpenAI

An Outlines OpenAI model instance.

Source code in outlines/models/openai.py
def from_openai(
    client: Union["OpenAIClient", "AzureOpenAIClient"],
    model_name: Optional[str] = None,
) -> OpenAI:
    """Create an Outlines `OpenAI` model instance from an `openai.OpenAI`
    client.

    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    OpenAI
        An Outlines `OpenAI` model instance.

    """
    return OpenAI(client, model_name)

sglang

Integration with an SGLang server.

AsyncSGLang

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py
class AsyncSGLang(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.
        model_name
            The name of the model to use.

        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using `sglang`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise ValueError(
                    f"The sglang server refused to answer the request: "
                    f"{message.refusal}"
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "SGLang does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Return a text generator.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )

        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }

        return client_args

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client

An openai.AsyncOpenAI client instance.

required
model_name Optional[str]

The name of the model to use.

None

Parameters:

Name Type Description Default
client

An openai.AsyncOpenAI client instance.

required
Source code in outlines/models/sglang.py
def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.
    model_name
        The name of the model to use.

    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs) async

Generate text using sglang.

Parameters:

Name Type Description Default
model_input Union[Chat, str, list]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Union[str, list[str]]

The text generated by the model.

Source code in outlines/models/sglang.py
async def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using `sglang`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise ValueError(
                f"The sglang server refused to answer the request: "
                f"{message.refusal}"
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

generate_stream(model_input, output_type=None, **inference_kwargs) async

Return a text generator.

Parameters:

Name Type Description Default
model_input Union[Chat, str, list]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
AsyncIterator[str]

An async iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py
async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Return a text generator.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    stream = await self.client.chat.completions.create(
        **client_args,
        stream=True,
    )

    async for chunk in stream:  # pragma: no cover
        if chunk.choices and chunk.choices[0].delta.content is not None:
            yield chunk.choices[0].delta.content

SGLang

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py
class SGLang(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise ValueError(
                    f"The SGLang server refused to answer the request: "
                    f"{message.refusal}"
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "SGLang does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )

        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }

        return client_args

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client

An openai.OpenAI client instance.

required
model_name Optional[str]

The name of the model to use.

None
Source code in outlines/models/sglang.py
def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using SGLang.

Parameters:

Name Type Description Default
model_input Union[Chat, list, str]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Union[str, list[str]]

The text generated by the model.

Source code in outlines/models/sglang.py
def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise ValueError(
                f"The SGLang server refused to answer the request: "
                f"{message.refusal}"
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

generate_stream(model_input, output_type=None, **inference_kwargs)

Stream text using SGLang.

Parameters:

Name Type Description Default
model_input Union[Chat, list, str]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py
def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    stream = self.client.chat.completions.create(
        **client_args, stream=True,
    )

    for chunk in stream:  # pragma: no cover
        if chunk.choices and chunk.choices[0].delta.content is not None:
            yield chunk.choices[0].delta.content

SGLangTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the SGLang and AsyncSGLang models.

Source code in outlines/models/sglang.py
class SGLangTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `SGLang` and `AsyncSGLang` models."""

    def format_input(self, model_input: Union[Chat, list, str]) -> list:
        """Generate the value of the messages argument to pass to the client.

        We rely on the OpenAITypeAdapter to format the input as the sglang
        server expects input in the same format as OpenAI.

        Parameters
        ----------
        model_input
            The input passed by the user.

        Returns
        -------
        list
            The formatted input to be passed to the client.

        """
        return OpenAITypeAdapter().format_input(model_input)

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the client.

        Parameters
        ----------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The formatted output type to be passed to the client.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            warnings.warn(
                "SGLang grammar-based structured outputs expects an EBNF "
                "grammar instead of a Lark grammar as is generally used in "
                "Outlines. The grammar cannot be used as a structured output "
                "type with an outlines backend, it is only compatible with "
                "the sglang and llguidance backends."
            )
            return {"extra_body": {"ebnf": term.definition}}
        elif isinstance(term, JsonSchema):
            return OpenAITypeAdapter().format_json_output_type(
                json.loads(term.schema)
            )
        else:
            return {"extra_body": {"regex": to_regex(term)}}

format_input(model_input)

Generate the value of the messages argument to pass to the client.

We rely on the OpenAITypeAdapter to format the input as the sglang server expects input in the same format as OpenAI.

Parameters:

Name Type Description Default
model_input Union[Chat, list, str]

The input passed by the user.

required

Returns:

Type Description
list

The formatted input to be passed to the client.

Source code in outlines/models/sglang.py
def format_input(self, model_input: Union[Chat, list, str]) -> list:
    """Generate the value of the messages argument to pass to the client.

    We rely on the OpenAITypeAdapter to format the input as the sglang
    server expects input in the same format as OpenAI.

    Parameters
    ----------
    model_input
        The input passed by the user.

    Returns
    -------
    list
        The formatted input to be passed to the client.

    """
    return OpenAITypeAdapter().format_input(model_input)

format_output_type(output_type=None)

Generate the structured output argument to pass to the client.

Parameters:

Name Type Description Default
output_type Optional[Any]

The structured output type provided.

None

Returns:

Type Description
dict

The formatted output type to be passed to the client.

Source code in outlines/models/sglang.py
def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the client.

    Parameters
    ----------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The formatted output type to be passed to the client.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        warnings.warn(
            "SGLang grammar-based structured outputs expects an EBNF "
            "grammar instead of a Lark grammar as is generally used in "
            "Outlines. The grammar cannot be used as a structured output "
            "type with an outlines backend, it is only compatible with "
            "the sglang and llguidance backends."
        )
        return {"extra_body": {"ebnf": term.definition}}
    elif isinstance(term, JsonSchema):
        return OpenAITypeAdapter().format_json_output_type(
            json.loads(term.schema)
        )
    else:
        return {"extra_body": {"regex": to_regex(term)}}

from_sglang(client, model_name=None)

Create a SGLang or AsyncSGLang instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name Type Description Default
client Union[OpenAI, AsyncOpenAI]

An openai.OpenAI or openai.AsyncOpenAI instance.

required
model_name Optional[str]

The name of the model to use.

None

Returns:

Type Description
Union[SGLang, AsyncSGLang]

An Outlines SGLang or AsyncSGLang model instance.

Source code in outlines/models/sglang.py
def from_sglang(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[SGLang, AsyncSGLang]:
    """Create a `SGLang` or `AsyncSGLang` instance from an `openai.OpenAI` or
    `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[SGLang, AsyncSGLang]
        An Outlines `SGLang` or `AsyncSGLang` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return SGLang(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncSGLang(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

tgi

Integration with a TGI server.

AsyncTGI

Bases: AsyncModel

Thin async wrapper around a huggingface_hub.AsyncInferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.AsyncInferenceClient client.

Source code in outlines/models/tgi.py
class AsyncTGI(AsyncModel):
    """Thin async wrapper around a `huggingface_hub.AsyncInferenceClient`
    client used to communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.AsyncInferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `AsyncInferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    async def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        response = await self.client.text_generation(**client_args)

        return response

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("TGI does not support batch inference.")

    async def generate_stream( # type: ignore
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        stream = await self.client.text_generation(
            **client_args, stream=True
        )

        async for chunk in stream:  # pragma: no cover
            yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

__init__(client)

Parameters:

Name Type Description Default
client

A huggingface AsyncInferenceClient client instance.

required
Source code in outlines/models/tgi.py
def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `AsyncInferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

generate(model_input, output_type=None, **inference_kwargs) async

Generate text using TGI.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types except CFG are supported provided your server uses a backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
str

The text generated by the model.

Source code in outlines/models/tgi.py
async def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    response = await self.client.text_generation(**client_args)

    return response

generate_stream(model_input, output_type=None, **inference_kwargs) async

Stream text using TGI.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types except CFG are supported provided your server uses a backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
AsyncIterator[str]

An async iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py
async def generate_stream( # type: ignore
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    stream = await self.client.text_generation(
        **client_args, stream=True
    )

    async for chunk in stream:  # pragma: no cover
        yield chunk

TGI

Bases: Model

Thin wrapper around a huggingface_hub.InferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.InferenceClient client.

Source code in outlines/models/tgi.py
class TGI(Model):
    """Thin wrapper around a `huggingface_hub.InferenceClient` client used to
    communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.InferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `InferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        return self.client.text_generation(**client_args)

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("TGI does not support batch inference.")

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        stream = self.client.text_generation(
            **client_args, stream=True,
        )

        for chunk in stream:  # pragma: no cover
            yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

__init__(client)

Parameters:

Name Type Description Default
client

A huggingface InferenceClient client instance.

required
Source code in outlines/models/tgi.py
def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `InferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using TGI.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types except CFG are supported provided your server uses a backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
str

The text generated by the model.

Source code in outlines/models/tgi.py
def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    return self.client.text_generation(**client_args)

generate_stream(model_input, output_type=None, **inference_kwargs)

Stream text using TGI.

Parameters:

Name Type Description Default
model_input str

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types except CFG are supported provided your server uses a backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py
def generate_stream(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    stream = self.client.text_generation(
        **client_args, stream=True,
    )

    for chunk in stream:  # pragma: no cover
        yield chunk

TGITypeAdapter

Bases: ModelTypeAdapter

Type adapter for the TGI and AsyncTGI models.

Source code in outlines/models/tgi.py
class TGITypeAdapter(ModelTypeAdapter):
    """Type adapter for the `TGI` and `AsyncTGI` models."""

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the client.

        Argument
        --------
        model_input
            The input passed by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {input} is not available with TGI. "
            + "The only available type is `str`."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str:
        return model_input

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the client.

        Argument
        --------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The structured output argument to pass to the client.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            raise NotImplementedError(
                "TGI does not support CFG-based structured outputs."
            )
        elif isinstance(term, JsonSchema):
            return {
                "grammar": {
                    "type": "json",
                    "value": json.loads(term.schema),
                }
            }
        else:
            return {
                "grammar": {
                    "type": "regex",
                    "value": to_regex(term),
                }
            }

format_input(model_input)

Generate the prompt argument to pass to the client.

Argument

model_input The input passed by the user.

Returns:

Type Description
str

The formatted input to be passed to the model.

Source code in outlines/models/tgi.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the client.

    Argument
    --------
    model_input
        The input passed by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {input} is not available with TGI. "
        + "The only available type is `str`."
    )

format_output_type(output_type=None)

Generate the structured output argument to pass to the client.

Argument

output_type The structured output type provided.

Returns:

Type Description
dict

The structured output argument to pass to the client.

Source code in outlines/models/tgi.py
def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the client.

    Argument
    --------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The structured output argument to pass to the client.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        raise NotImplementedError(
            "TGI does not support CFG-based structured outputs."
        )
    elif isinstance(term, JsonSchema):
        return {
            "grammar": {
                "type": "json",
                "value": json.loads(term.schema),
            }
        }
    else:
        return {
            "grammar": {
                "type": "regex",
                "value": to_regex(term),
            }
        }

from_tgi(client)

Create an Outlines TGI or AsyncTGI model instance from an huggingface_hub.InferenceClient or huggingface_hub.AsyncInferenceClient instance.

Parameters:

Name Type Description Default
client Union[InferenceClient, AsyncInferenceClient]

An huggingface_hub.InferenceClient or huggingface_hub.AsyncInferenceClient instance.

required

Returns:

Type Description
Union[TGI, AsyncTGI]

An Outlines TGI or AsyncTGI model instance.

Source code in outlines/models/tgi.py
def from_tgi(
    client: Union["InferenceClient", "AsyncInferenceClient"],
) -> Union[TGI, AsyncTGI]:
    """Create an Outlines `TGI` or `AsyncTGI` model instance from an
    `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient`
    instance.

    Parameters
    ----------
    client
        An `huggingface_hub.InferenceClient` or
        `huggingface_hub.AsyncInferenceClient` instance.

    Returns
    -------
    Union[TGI, AsyncTGI]
        An Outlines `TGI` or `AsyncTGI` model instance.

    """
    from huggingface_hub import AsyncInferenceClient, InferenceClient

    if isinstance(client, InferenceClient):
        return TGI(client)
    elif isinstance(client, AsyncInferenceClient):
        return AsyncTGI(client)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            + "Please provide an HuggingFace InferenceClient "
            + "or AsyncInferenceClient instance."
        )

tokenizer

Tokenizer

Bases: Hashable, Protocol

Source code in outlines/models/tokenizer.py
class Tokenizer(Hashable, Protocol):
    eos_token: str
    eos_token_id: int
    pad_token_id: int
    vocabulary: Dict[str, int]
    special_tokens: Set[str]

    def encode(
        self, prompt: Union[str, List[str]]
    ) -> "Tuple['NDArray[np.int64]', 'NDArray[np.int64]']":
        """Translate the input prompts into arrays of token ids and attention mask."""
        ...

    def decode(self, token_ids: "NDArray[np.int64]") -> List[str]:
        """Translate an array of token ids to a string or list of strings."""
        ...

    def convert_token_to_string(self, token: str) -> str:
        """Convert a token to its equivalent string.

        This is for instance useful for BPE tokenizers where whitespaces are
        represented by the special characted `Ġ`. This prevents matching a raw
        token that includes `Ġ` with a string.
        """
        ...

convert_token_to_string(token)

Convert a token to its equivalent string.

This is for instance useful for BPE tokenizers where whitespaces are represented by the special characted Ġ. This prevents matching a raw token that includes Ġ with a string.

Source code in outlines/models/tokenizer.py
def convert_token_to_string(self, token: str) -> str:
    """Convert a token to its equivalent string.

    This is for instance useful for BPE tokenizers where whitespaces are
    represented by the special characted `Ġ`. This prevents matching a raw
    token that includes `Ġ` with a string.
    """
    ...

decode(token_ids)

Translate an array of token ids to a string or list of strings.

Source code in outlines/models/tokenizer.py
def decode(self, token_ids: "NDArray[np.int64]") -> List[str]:
    """Translate an array of token ids to a string or list of strings."""
    ...

encode(prompt)

Translate the input prompts into arrays of token ids and attention mask.

Source code in outlines/models/tokenizer.py
def encode(
    self, prompt: Union[str, List[str]]
) -> "Tuple['NDArray[np.int64]', 'NDArray[np.int64]']":
    """Translate the input prompts into arrays of token ids and attention mask."""
    ...

transformers

Integration with the transformers library.

TransformerTokenizer

Bases: Tokenizer

Represents a tokenizer for models in the transformers library.

Source code in outlines/models/transformers.py
class TransformerTokenizer(Tokenizer):
    """Represents a tokenizer for models in the `transformers` library."""

    def __init__(self, tokenizer: "PreTrainedTokenizer", **kwargs):
        self.tokenizer = tokenizer
        self.eos_token_id = self.tokenizer.eos_token_id
        self.eos_token = self.tokenizer.eos_token

        if self.tokenizer.pad_token_id is None:
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
            self.pad_token_id = self.eos_token_id
        else:
            self.pad_token_id = self.tokenizer.pad_token_id
            self.pad_token = self.tokenizer.pad_token

        self.special_tokens = set(self.tokenizer.all_special_tokens)

        self.vocabulary = self.tokenizer.get_vocab()
        self.is_llama = isinstance(self.tokenizer, get_llama_tokenizer_types())

    def encode(
        self, prompt: Union[str, List[str]], **kwargs
    ) -> Tuple["torch.LongTensor", "torch.LongTensor"]:
        kwargs["padding"] = True
        kwargs["return_tensors"] = "pt"
        output = self.tokenizer(prompt, **kwargs)
        return output["input_ids"], output["attention_mask"]

    def decode(self, token_ids: "torch.LongTensor") -> List[str]:
        text = self.tokenizer.batch_decode(token_ids, skip_special_tokens=True)
        return text

    def convert_token_to_string(self, token: str) -> str:
        from transformers.file_utils import SPIECE_UNDERLINE

        string = self.tokenizer.convert_tokens_to_string([token])

        if self.is_llama:
            # A hack to handle missing spaces to HF's Llama tokenizers
            if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
                return " " + string

        return string

    def __eq__(self, other):
        if isinstance(other, type(self)):
            if hasattr(self, "model_name") and hasattr(self, "kwargs"):
                return (
                    other.model_name == self.model_name and other.kwargs == self.kwargs
                )
            else:
                return other.tokenizer == self.tokenizer
        return NotImplemented

    def __hash__(self):
        from datasets.fingerprint import Hasher

        return hash(Hasher.hash(self.tokenizer))

    def __getstate__(self):
        state = {"tokenizer": self.tokenizer}
        return state

    def __setstate__(self, state):
        self.__init__(state["tokenizer"])

Transformers

Bases: Model

Thin wrapper around a transformers model and a transformers tokenizer.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and tokenizer.

Source code in outlines/models/transformers.py
class Transformers(Model):
    """Thin wrapper around a `transformers` model and a `transformers`
    tokenizer.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    tokenizer.

    """

    def __init__(
        self,
        model: "PreTrainedModel",
        tokenizer: "PreTrainedTokenizer",
    ):
        """
        Parameters:
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        tokenizer
            A `PreTrainedTokenizer`, or any tokenizer that is compatible with
            the `transformers` API for tokenizers.

        """
        # We need to handle the cases in which jax/flax or tensorflow
        # is not available in the environment.
        try:
            from transformers import FlaxPreTrainedModel
        except ImportError:  # pragma: no cover
            FlaxPreTrainedModel = None

        try:
            from transformers import TFPreTrainedModel
        except ImportError:  # pragma: no cover
            TFPreTrainedModel = None

        tokenizer.padding_side = "left"
        self.model = model
        self.tokenizer = TransformerTokenizer(tokenizer)
        self.type_adapter = TransformersTypeAdapter(tokenizer=tokenizer)

        if (
            FlaxPreTrainedModel is not None
            and isinstance(model, FlaxPreTrainedModel)
        ):
            self.tensor_library_name = "jax"
        elif (
            TFPreTrainedModel is not None
            and isinstance(model, TFPreTrainedModel)
        ):
            self.tensor_library_name = "tensorflow"
        else:
            self.tensor_library_name = "torch"

    def _prepare_model_inputs(
        self,
        model_input,
        is_batch: bool = False,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        # Format validation
        if is_batch:
            prompts = [
                self.type_adapter.format_input(item)
                for item in model_input
            ]
        else:
            prompts = self.type_adapter.format_input(model_input)
        input_ids, attention_mask = self.tokenizer.encode(prompts)
        inputs = {
            "input_ids": input_ids.to(self.model.device),
            "attention_mask": attention_mask.to(self.model.device),
        }

        return prompts, inputs

    def generate(
        self,
        model_input: Union[str, dict, Chat],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str]]:
        """Generate text using `transformers`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response. For
            multi-modal models, the input should be a dictionary containing the
            `text` key with a value of type `Union[str, List[str]]` and the
            other keys required by the model.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            of the `transformers` model.

        Returns
        -------
        Union[str, List[str]]
            The text generated by the model.

        """
        prompts, inputs = self._prepare_model_inputs(model_input, False)
        logits_processor = self.type_adapter.format_output_type(output_type)

        generated_ids = self._generate_output_seq(
            prompts,
            inputs,
            logits_processor=logits_processor,
            **inference_kwargs,
        )

        # required for multi-modal models that return a 2D tensor even when
        # num_return_sequences is 1
        num_samples = inference_kwargs.get("num_return_sequences", 1)
        if num_samples == 1 and len(generated_ids.shape) == 2:
            generated_ids = generated_ids.squeeze(0)

        return self._decode_generation(generated_ids)

    def generate_batch(
        self,
        model_input: List[Union[str, dict, Chat]],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> List[Union[str, List[str]]]:
        """"""
        prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore
        logits_processor = self.type_adapter.format_output_type(output_type)

        generated_ids = self._generate_output_seq(
            prompts, inputs, logits_processor=logits_processor, **inference_kwargs
        )

        # if there are multiple samples per input, convert generated_id to 3D
        num_samples = inference_kwargs.get("num_return_sequences", 1)
        if num_samples > 1:
            generated_ids = generated_ids.view(len(model_input), num_samples, -1)

        return self._decode_generation(generated_ids)

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `transformers` models.

        TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

        """
        raise NotImplementedError(
            "Streaming is not implemented for Transformers models."
        )

    def _generate_output_seq(self, prompts, inputs, **inference_kwargs):
        input_ids = inputs["input_ids"]

        output_ids = self.model.generate(
            **inputs,
            **inference_kwargs,
        )

        # encoder-decoder returns output_ids only, decoder-only returns full seq ids
        if self.model.config.is_encoder_decoder:
            generated_ids = output_ids
        else:
            generated_ids = output_ids[:, input_ids.shape[1] :]

        return generated_ids

    def _decode_generation(self, generated_ids: "torch.Tensor"):
        if len(generated_ids.shape) == 1:
            return self.tokenizer.decode([generated_ids])[0]
        elif len(generated_ids.shape) == 2:
            return self.tokenizer.decode(generated_ids)
        elif len(generated_ids.shape) == 3:
            return [
                self.tokenizer.decode(generated_ids[i])
                for i in range(len(generated_ids))
            ]
        else:  # pragma: no cover
            raise TypeError(
                "Generated outputs aren't 1D, 2D or 3D, but instead are "
                f"{generated_ids.shape}"
            )

__init__(model, tokenizer)

Parameters:

model A PreTrainedModel, or any model that is compatible with the transformers API for models. tokenizer A PreTrainedTokenizer, or any tokenizer that is compatible with the transformers API for tokenizers.

Source code in outlines/models/transformers.py
def __init__(
    self,
    model: "PreTrainedModel",
    tokenizer: "PreTrainedTokenizer",
):
    """
    Parameters:
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    tokenizer
        A `PreTrainedTokenizer`, or any tokenizer that is compatible with
        the `transformers` API for tokenizers.

    """
    # We need to handle the cases in which jax/flax or tensorflow
    # is not available in the environment.
    try:
        from transformers import FlaxPreTrainedModel
    except ImportError:  # pragma: no cover
        FlaxPreTrainedModel = None

    try:
        from transformers import TFPreTrainedModel
    except ImportError:  # pragma: no cover
        TFPreTrainedModel = None

    tokenizer.padding_side = "left"
    self.model = model
    self.tokenizer = TransformerTokenizer(tokenizer)
    self.type_adapter = TransformersTypeAdapter(tokenizer=tokenizer)

    if (
        FlaxPreTrainedModel is not None
        and isinstance(model, FlaxPreTrainedModel)
    ):
        self.tensor_library_name = "jax"
    elif (
        TFPreTrainedModel is not None
        and isinstance(model, TFPreTrainedModel)
    ):
        self.tensor_library_name = "tensorflow"
    else:
        self.tensor_library_name = "torch"

generate(model_input, output_type=None, **inference_kwargs)

Generate text using transformers.

Parameters:

Name Type Description Default
model_input Union[str, dict, Chat]

The prompt based on which the model will generate a response. For multi-modal models, the input should be a dictionary containing the text key with a value of type Union[str, List[str]] and the other keys required by the model.

required
output_type Optional[OutlinesLogitsProcessor]

The logits processor the model will use to constrain the format of the generated text.

None
inference_kwargs Any

Additional keyword arguments to pass to the generate method of the transformers model.

{}

Returns:

Type Description
Union[str, List[str]]

The text generated by the model.

Source code in outlines/models/transformers.py
def generate(
    self,
    model_input: Union[str, dict, Chat],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str]]:
    """Generate text using `transformers`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response. For
        multi-modal models, the input should be a dictionary containing the
        `text` key with a value of type `Union[str, List[str]]` and the
        other keys required by the model.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        of the `transformers` model.

    Returns
    -------
    Union[str, List[str]]
        The text generated by the model.

    """
    prompts, inputs = self._prepare_model_inputs(model_input, False)
    logits_processor = self.type_adapter.format_output_type(output_type)

    generated_ids = self._generate_output_seq(
        prompts,
        inputs,
        logits_processor=logits_processor,
        **inference_kwargs,
    )

    # required for multi-modal models that return a 2D tensor even when
    # num_return_sequences is 1
    num_samples = inference_kwargs.get("num_return_sequences", 1)
    if num_samples == 1 and len(generated_ids.shape) == 2:
        generated_ids = generated_ids.squeeze(0)

    return self._decode_generation(generated_ids)

generate_batch(model_input, output_type=None, **inference_kwargs)

Source code in outlines/models/transformers.py
def generate_batch(
    self,
    model_input: List[Union[str, dict, Chat]],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> List[Union[str, List[str]]]:
    """"""
    prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore
    logits_processor = self.type_adapter.format_output_type(output_type)

    generated_ids = self._generate_output_seq(
        prompts, inputs, logits_processor=logits_processor, **inference_kwargs
    )

    # if there are multiple samples per input, convert generated_id to 3D
    num_samples = inference_kwargs.get("num_return_sequences", 1)
    if num_samples > 1:
        generated_ids = generated_ids.view(len(model_input), num_samples, -1)

    return self._decode_generation(generated_ids)

generate_stream(model_input, output_type, **inference_kwargs)

Not available for transformers models.

TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

Source code in outlines/models/transformers.py
def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `transformers` models.

    TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

    """
    raise NotImplementedError(
        "Streaming is not implemented for Transformers models."
    )

TransformersMultiModal

Bases: Transformers

Thin wrapper around a transformers model and a transformers processor.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and processor.

Source code in outlines/models/transformers.py
class TransformersMultiModal(Transformers):
    """Thin wrapper around a `transformers` model and a `transformers`
    processor.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    processor.

    """

    def __init__(self, model: "PreTrainedModel", processor):
        """Create a TransformersMultiModal model instance

        We rely on the `__init__` method of the `Transformers` class to handle
        most of the initialization and then add elements specific to multimodal
        models.

        Parameters
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        processor
            A `ProcessorMixin` instance.

        """
        self.processor = processor
        self.processor.padding_side = "left"
        self.processor.pad_token = "[PAD]"

        tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

        super().__init__(model, tokenizer)

        self.type_adapter = TransformersMultiModalTypeAdapter(
            tokenizer=tokenizer
        )

    def _prepare_model_inputs(
        self,
        model_input,
        is_batch: bool = False,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        if is_batch:
            prompts = [
                self.type_adapter.format_input(item) for item in model_input
            ]
        else:
            prompts = self.type_adapter.format_input(model_input)

        # The expected format is a single dict
        if is_batch:
            merged_prompts = defaultdict(list)
            for d in prompts:
                for key, value in d.items():
                    if key == "text":
                        merged_prompts[key].append(value)
                    else:
                        merged_prompts[key].extend(value)
        else:
            merged_prompts = prompts # type: ignore

        inputs = self.processor(
            **merged_prompts, padding=True, return_tensors="pt"
        ).to(self.model.device)

        return merged_prompts["text"], inputs

__init__(model, processor)

Create a TransformersMultiModal model instance

We rely on the __init__ method of the Transformers class to handle most of the initialization and then add elements specific to multimodal models.

Parameters:

Name Type Description Default
model PreTrainedModel

A PreTrainedModel, or any model that is compatible with the transformers API for models.

required
processor

A ProcessorMixin instance.

required
Source code in outlines/models/transformers.py
def __init__(self, model: "PreTrainedModel", processor):
    """Create a TransformersMultiModal model instance

    We rely on the `__init__` method of the `Transformers` class to handle
    most of the initialization and then add elements specific to multimodal
    models.

    Parameters
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    processor
        A `ProcessorMixin` instance.

    """
    self.processor = processor
    self.processor.padding_side = "left"
    self.processor.pad_token = "[PAD]"

    tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

    super().__init__(model, tokenizer)

    self.type_adapter = TransformersMultiModalTypeAdapter(
        tokenizer=tokenizer
    )

TransformersMultiModalTypeAdapter

Bases: ModelTypeAdapter

Type adapter for TransformersMultiModal model.

Source code in outlines/models/transformers.py
class TransformersMultiModalTypeAdapter(ModelTypeAdapter):
    """Type adapter for `TransformersMultiModal` model."""

    def __init__(self, **kwargs):
        self.tokenizer = kwargs.get("tokenizer")

    @singledispatchmethod
    def format_input(self, model_input):
        """Fomat the prompt arguments to pass to the model.

        Argument
        --------
        model_input
            The input passed by the user.

        Returns
        -------
        dict
            The formatted input.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available. Please "
            + "provide a list containing a text prompt and assets "
            + "(`Image`, `Audio` or `Video` instances) supported by your "
            + "model or a `Chat` instance."
        )

    @format_input.register(dict)
    def format_dict_input(self, model_input: dict) -> dict:
        warnings.warn("""
            Providing the input as a dict is deprecated. Support for this will
            be removed in the v1.2.0 release of Outlines. Use a list containing
            a text prompt and assets (`Image`, `Audio` or `Video` instances)
            instead.
            For instance:
            ```python
            from outlines import Image
            model = from_transformers(mymodel, myprocessor)
            response = model([
                "A beautiful image of a cat",
                Image(my_image),
            ])
            ```
            """,
            DeprecationWarning,
            stacklevel=2,
        )
        if "text" not in model_input:
            raise ValueError(
                "The input must contain the 'text' key along with the other "
                + "keys required by your processor."
            )
        return model_input

    @format_input.register(Chat)
    def format_chat_input(self, model_input: Chat) -> dict:
        # we need to separate the images from the messages
        # to apply the chat template to the messages without images
        messages = model_input.messages
        images = []
        messages_without_images = []
        for message in messages:
            if isinstance(message["content"], list):
                images.extend(message["content"][1:])
                messages_without_images.append({
                    "role": message["role"],
                    "content": message["content"][0],
                })
            else:
                messages_without_images.append(message)
        formatted_prompt = self.tokenizer.apply_chat_template(
            messages_without_images,
            tokenize=False
        )
        # use the formatted prompt and the images to format the input
        return self.format_list_input([formatted_prompt, *images])

    @format_input.register(list)
    def format_list_input(self, model_input: list) -> dict:
        prompt = model_input[0]
        assets = model_input[1:]

        asset_types = set(type(asset) for asset in assets)
        if len(asset_types) > 1:
            raise ValueError(
                "All assets must be of the same type. "
                + f"Found types: {asset_types}"
            )
        asset_type = asset_types.pop()

        if asset_type == Image:
            return {
                "text": prompt,
                "images": [asset.image for asset in assets]
            }
        elif asset_type == Audio: # pragma: no cover
            return {
                "text": prompt,
                "audio": [asset.audio for asset in assets]
            }
        elif asset_type == Video: # pragma: no cover
            return {
                "text": prompt,
                "videos": [asset.video for asset in assets]
            }
        else:
            raise ValueError(f"Unsupported asset type: {asset_type}")

    def format_output_type(
        self,
        output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional["LogitsProcessorList"]:
        """Generate the logits processor argument to pass to the model.

        Argument
        --------
        output_type
            The logits processor provided.

        Returns
        -------
        Optional[LogitsProcessorList]
            The logits processor to pass to the model.

        """
        from transformers import LogitsProcessorList

        if output_type is not None:
            return LogitsProcessorList([output_type])
        return None

format_input(model_input)

Fomat the prompt arguments to pass to the model.

Argument

model_input The input passed by the user.

Returns:

Type Description
dict

The formatted input.

Source code in outlines/models/transformers.py
@singledispatchmethod
def format_input(self, model_input):
    """Fomat the prompt arguments to pass to the model.

    Argument
    --------
    model_input
        The input passed by the user.

    Returns
    -------
    dict
        The formatted input.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available. Please "
        + "provide a list containing a text prompt and assets "
        + "(`Image`, `Audio` or `Video` instances) supported by your "
        + "model or a `Chat` instance."
    )

format_output_type(output_type=None)

Generate the logits processor argument to pass to the model.

Argument

output_type The logits processor provided.

Returns:

Type Description
Optional[LogitsProcessorList]

The logits processor to pass to the model.

Source code in outlines/models/transformers.py
def format_output_type(
    self,
    output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional["LogitsProcessorList"]:
    """Generate the logits processor argument to pass to the model.

    Argument
    --------
    output_type
        The logits processor provided.

    Returns
    -------
    Optional[LogitsProcessorList]
        The logits processor to pass to the model.

    """
    from transformers import LogitsProcessorList

    if output_type is not None:
        return LogitsProcessorList([output_type])
    return None

TransformersTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the Transformers model.

Source code in outlines/models/transformers.py
class TransformersTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Transformers` model."""

    def __init__(self, **kwargs):
        self.tokenizer = kwargs.get("tokenizer")

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Parameters
        ----------
        model_input
            The input passed by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available."
            "The only available types are `str` and `Chat`."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str:
        return model_input

    @format_input.register(Chat)
    def format_chat_input(self, model_input: Chat) -> str:
        return self.tokenizer.apply_chat_template(
            model_input.messages,
            tokenize=False,
            add_generation_prompt=True,
        )

    def format_output_type(
        self,
        output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional["LogitsProcessorList"]:
        """Generate the logits processor argument to pass to the model.

        Parameters
        ----------
        output_type
            The logits processor provided.

        Returns
        -------
        Optional[LogitsProcessorList]
            The logits processor to pass to the model.

        """
        from transformers import LogitsProcessorList

        if output_type is not None:
            return LogitsProcessorList([output_type])
        return None

format_input(model_input)

Generate the prompt argument to pass to the model.

Parameters:

Name Type Description Default
model_input

The input passed by the user.

required

Returns:

Type Description
str

The formatted input to be passed to the model.

Source code in outlines/models/transformers.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Parameters
    ----------
    model_input
        The input passed by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available."
        "The only available types are `str` and `Chat`."
    )

format_output_type(output_type=None)

Generate the logits processor argument to pass to the model.

Parameters:

Name Type Description Default
output_type Optional[OutlinesLogitsProcessor]

The logits processor provided.

None

Returns:

Type Description
Optional[LogitsProcessorList]

The logits processor to pass to the model.

Source code in outlines/models/transformers.py
def format_output_type(
    self,
    output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional["LogitsProcessorList"]:
    """Generate the logits processor argument to pass to the model.

    Parameters
    ----------
    output_type
        The logits processor provided.

    Returns
    -------
    Optional[LogitsProcessorList]
        The logits processor to pass to the model.

    """
    from transformers import LogitsProcessorList

    if output_type is not None:
        return LogitsProcessorList([output_type])
    return None

from_transformers(model, tokenizer_or_processor)

Create an Outlines Transformers or TransformersMultiModal model instance from a PreTrainedModel instance and a PreTrainedTokenizer or ProcessorMixin instance.

outlines supports PreTrainedModelForCausalLM, PreTrainedMambaForCausalLM, PreTrainedModelForSeq2Seq and any model that implements the transformers model API.

Parameters:

Name Type Description Default
model PreTrainedModel

A transformers.PreTrainedModel instance.

required
tokenizer_or_processor Union[PreTrainedTokenizer, ProcessorMixin]

A transformers.PreTrainedTokenizer or transformers.ProcessorMixin instance.

required

Returns:

Type Description
Union[Transformers, TransformersMultiModal]

An Outlines Transformers or TransformersMultiModal model instance.

Source code in outlines/models/transformers.py
def from_transformers(
    model: "PreTrainedModel",
    tokenizer_or_processor: Union["PreTrainedTokenizer", "ProcessorMixin"],
) -> Union[Transformers, TransformersMultiModal]:
    """Create an Outlines `Transformers` or `TransformersMultiModal` model
    instance from a `PreTrainedModel` instance and a `PreTrainedTokenizer` or
    `ProcessorMixin` instance.

    `outlines` supports `PreTrainedModelForCausalLM`,
    `PreTrainedMambaForCausalLM`, `PreTrainedModelForSeq2Seq` and any model
    that implements the `transformers` model API.

    Parameters
    ----------
    model
        A `transformers.PreTrainedModel` instance.
    tokenizer_or_processor
        A `transformers.PreTrainedTokenizer` or
        `transformers.ProcessorMixin` instance.

    Returns
    -------
    Union[Transformers, TransformersMultiModal]
        An Outlines `Transformers` or `TransformersMultiModal` model instance.

    """
    from transformers import (
        PreTrainedTokenizer, PreTrainedTokenizerFast, ProcessorMixin)

    if isinstance(
        tokenizer_or_processor, (PreTrainedTokenizer, PreTrainedTokenizerFast)
    ):
        tokenizer = tokenizer_or_processor
        return Transformers(model, tokenizer)
    elif isinstance(tokenizer_or_processor, ProcessorMixin):
        processor = tokenizer_or_processor
        return TransformersMultiModal(model, processor)
    else:
        raise ValueError(
            "We could determine whether the model passed to `from_transformers`"
            + " is a text-2-text or a multi-modal model. Please provide a "
            + "a transformers tokenizer or processor."
        )

get_llama_tokenizer_types()

Get all the Llama tokenizer types/classes that need work-arounds.

When they can't be imported, a dummy class is created.

Source code in outlines/models/transformers.py
def get_llama_tokenizer_types():
    """Get all the Llama tokenizer types/classes that need work-arounds.

    When they can't be imported, a dummy class is created.

    """
    try:
        from transformers.models.llama import LlamaTokenizer
    except ImportError:  # pragma: no cover

        class LlamaTokenizer:  # type: ignore
            pass

    try:
        from transformers.models.llama import LlamaTokenizerFast
    except ImportError:  # pragma: no cover

        class LlamaTokenizerFast:  # type: ignore
            pass

    try:
        from transformers.models.code_llama import CodeLlamaTokenizer
    except ImportError:  # pragma: no cover

        class CodeLlamaTokenizer:  # type: ignore
            pass

    try:
        from transformers.models.code_llama import CodeLlamaTokenizerFast
    except ImportError:  # pragma: no cover

        class CodeLlamaTokenizerFast:  # type: ignore
            pass

    return (
        LlamaTokenizer,
        LlamaTokenizerFast,
        CodeLlamaTokenizer,
        CodeLlamaTokenizerFast,
    )

utils

set_additional_properties_false_json_schema(schema)

Set additionalProperties to False to all objects in the schema using jsonpath.

Parameters:

Name Type Description Default
schema dict

The JSON schema to modify

required

Returns:

Type Description
dict

The modified schema with additionalProperties set to False

Source code in outlines/models/utils.py
def set_additional_properties_false_json_schema(schema: dict) -> dict:
    """Set additionalProperties to False to all objects in the schema using jsonpath.

    Parameters
    ----------
    schema
        The JSON schema to modify

    Returns
    -------
    dict
        The modified schema with additionalProperties set to False
    """
    # Get all nodes
    jsonpath_expr = jsonpath_ng.parse('$..*')
    matches = jsonpath_expr.find(schema)

    # Go over all nodes and set additionalProperties to False if it's an object
    for match in matches:
        if match.value == 'object':
            if 'additionalProperties' not in match.context.value:
                match.context.value['additionalProperties'] = False

    return schema

vllm

Integration with a vLLM server.

AsyncVLLM

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py
class AsyncVLLM(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "AsyncOpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise ValueError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}"
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("VLLM does not support batch inference.")

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.
        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )

        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client AsyncOpenAI

An openai.AsyncOpenAI client instance.

required
Source code in outlines/models/vllm.py
def __init__(
    self,
    client: "AsyncOpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs) async

Generate text using vLLM.

Parameters:

Name Type Description Default
model_input Union[Chat, str, list]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Union[str, list[str]]

The text generated by the model.

Source code in outlines/models/vllm.py
async def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise ValueError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}"
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

generate_stream(model_input, output_type=None, **inference_kwargs) async

Stream text using vLLM.

Parameters:

Name Type Description Default
model_input Union[Chat, str, list]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
AsyncIterator[str]

An async iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py
async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.
    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    stream = await self.client.chat.completions.create(
        **client_args,
        stream=True,
    )

    async for chunk in stream:  # pragma: no cover
        if chunk.choices and chunk.choices[0].delta.content is not None:
            yield chunk.choices[0].delta.content

VLLM

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py
class VLLM(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "OpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise ValueError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}"
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("VLLM does not support batch inference.")

    def generate_stream(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )

        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

__init__(client, model_name=None)

Parameters:

Name Type Description Default
client OpenAI

An openai.OpenAI client instance.

required
Source code in outlines/models/vllm.py
def __init__(
    self,
    client: "OpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using vLLM.

Parameters:

Name Type Description Default
model_input Union[Chat, str, list]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Union[str, list[str]]

The text generated by the model.

Source code in outlines/models/vllm.py
def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise ValueError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}"
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

generate_stream(model_input, output_type=None, **inference_kwargs)

Stream text using vLLM.

Parameters:

Name Type Description Default
model_input Union[Chat, str, list]

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.

None
inference_kwargs Any

Additional keyword arguments to pass to the client.

{}

Returns:

Type Description
Iterator[str]

An iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py
def generate_stream(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    stream = self.client.chat.completions.create(
        **client_args, stream=True,
    )

    for chunk in stream:  # pragma: no cover
        if chunk.choices and chunk.choices[0].delta.content is not None:
            yield chunk.choices[0].delta.content

VLLMTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the VLLM and AsyncVLLM models.

Source code in outlines/models/vllm.py
class VLLMTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `VLLM` and `AsyncVLLM` models."""

    def format_input(self, model_input: Union[Chat, str, list]) -> list:
        """Generate the value of the messages argument to pass to the client.

        We rely on the OpenAITypeAdapter to format the input as the vLLM server
        expects input in the same format as OpenAI.

        Parameters
        ----------
        model_input
            The input passed by the user.

        Returns
        -------
        list
            The formatted input to be passed to the model.

        """
        return OpenAITypeAdapter().format_input(model_input)

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the client.

        Parameters
        ----------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The structured output argument to pass to the model.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            return {"guided_grammar": term.definition}
        elif isinstance(term, JsonSchema):
            extra_body = {"guided_json": json.loads(term.schema)}
            if term.whitespace_pattern:
                extra_body["whitespace_pattern"] = term.whitespace_pattern
            return extra_body
        else:
            return {"guided_regex": to_regex(term)}

format_input(model_input)

Generate the value of the messages argument to pass to the client.

We rely on the OpenAITypeAdapter to format the input as the vLLM server expects input in the same format as OpenAI.

Parameters:

Name Type Description Default
model_input Union[Chat, str, list]

The input passed by the user.

required

Returns:

Type Description
list

The formatted input to be passed to the model.

Source code in outlines/models/vllm.py
def format_input(self, model_input: Union[Chat, str, list]) -> list:
    """Generate the value of the messages argument to pass to the client.

    We rely on the OpenAITypeAdapter to format the input as the vLLM server
    expects input in the same format as OpenAI.

    Parameters
    ----------
    model_input
        The input passed by the user.

    Returns
    -------
    list
        The formatted input to be passed to the model.

    """
    return OpenAITypeAdapter().format_input(model_input)

format_output_type(output_type=None)

Generate the structured output argument to pass to the client.

Parameters:

Name Type Description Default
output_type Optional[Any]

The structured output type provided.

None

Returns:

Type Description
dict

The structured output argument to pass to the model.

Source code in outlines/models/vllm.py
def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the client.

    Parameters
    ----------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The structured output argument to pass to the model.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        return {"guided_grammar": term.definition}
    elif isinstance(term, JsonSchema):
        extra_body = {"guided_json": json.loads(term.schema)}
        if term.whitespace_pattern:
            extra_body["whitespace_pattern"] = term.whitespace_pattern
        return extra_body
    else:
        return {"guided_regex": to_regex(term)}

from_vllm(client, model_name=None)

Create an Outlines VLLM or AsyncVLLM model instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name Type Description Default
client Union[OpenAI, AsyncOpenAI]

An openai.OpenAI or openai.AsyncOpenAI instance.

required
model_name Optional[str]

The name of the model to use.

None

Returns:

Type Description
Union[VLLM, AsyncVLLM]

An Outlines VLLM or AsyncVLLM model instance.

Source code in outlines/models/vllm.py
def from_vllm(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[VLLM, AsyncVLLM]:
    """Create an Outlines `VLLM` or `AsyncVLLM` model instance from an
    `openai.OpenAI` or `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[VLLM, AsyncVLLM]
        An Outlines `VLLM` or `AsyncVLLM` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return VLLM(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncVLLM(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

vllm_offline

Integration with the vllm library (offline mode).

VLLMOffline

Bases: Model

Thin wrapper around a vllm.LLM model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the vllm.LLM model.

Source code in outlines/models/vllm_offline.py
class VLLMOffline(Model):
    """Thin wrapper around a `vllm.LLM` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `vllm.LLM` model.

    """

    def __init__(self, model: "LLM"):
        """Create a VLLM model instance.

        Parameters
        ----------
        model
            A `vllm.LLM` model instance.

        """
        self.model = model
        self.type_adapter = VLLMOfflineTypeAdapter()

    def _build_generation_args(
        self,
        inference_kwargs: dict,
        output_type: Optional[Any] = None,
    ) -> "SamplingParams":
        """Create the `SamplingParams` object to pass to the `generate` method
        of the `vllm.LLM` model."""
        from vllm.sampling_params import GuidedDecodingParams, SamplingParams

        sampling_params = inference_kwargs.pop("sampling_params", None)

        if sampling_params is None:
            sampling_params = SamplingParams()

        output_type_args = self.type_adapter.format_output_type(output_type)
        if output_type_args:
            sampling_params.guided_decoding = GuidedDecodingParams(**output_type_args)

        return sampling_params

    def generate(
        self,
        model_input: Chat | str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str]]:
        """Generate text using vLLM offline.

        Parameters
        ----------
        prompt
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            in the `vllm.LLM` model.

        Returns
        -------
        Union[str, List[str]]
            The text generated by the model.

        """
        sampling_params = self._build_generation_args(
            inference_kwargs,
            output_type,
        )

        if isinstance(model_input, Chat):
            results = self.model.chat(
                messages=self.type_adapter.format_input(model_input),
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        else:
            results = self.model.generate(
                prompts=self.type_adapter.format_input(model_input),
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        results = [completion.text for completion in results[0].outputs]

        if len(results) == 1:
            return results[0]
        else:
            return results

    def generate_batch(
        self,
        model_input: List[Chat | str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[List[str], List[List[str]]]:
        """Generate a batch of completions using vLLM offline.

        Parameters
        ----------
        prompt
            The list of prompts based on which the model will generate a
            response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            in the `vllm.LLM` model.

        Returns
        -------
        Union[List[str], List[List[str]]]
            The text generated by the model.

        """
        sampling_params = self._build_generation_args(
            inference_kwargs,
            output_type,
        )

        if any(isinstance(item, Chat) for item in model_input):
            raise TypeError(
                "Batch generation is not available for the `Chat` input type."
            )

        results = self.model.generate(
            prompts=[self.type_adapter.format_input(item) for item in model_input],
            sampling_params=sampling_params,
            **inference_kwargs,
        )
        return [[sample.text for sample in batch.outputs] for batch in results]

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `vllm.LLM`.

        TODO: Implement the streaming functionality ourselves.

        """
        raise NotImplementedError(
            "Streaming is not available for the vLLM offline integration."
        )

__init__(model)

Create a VLLM model instance.

Parameters:

Name Type Description Default
model LLM

A vllm.LLM model instance.

required
Source code in outlines/models/vllm_offline.py
def __init__(self, model: "LLM"):
    """Create a VLLM model instance.

    Parameters
    ----------
    model
        A `vllm.LLM` model instance.

    """
    self.model = model
    self.type_adapter = VLLMOfflineTypeAdapter()

generate(model_input, output_type=None, **inference_kwargs)

Generate text using vLLM offline.

Parameters:

Name Type Description Default
prompt

The prompt based on which the model will generate a response.

required
output_type Optional[Any]

The logits processor the model will use to constrain the format of the generated text.

None
inference_kwargs Any

Additional keyword arguments to pass to the generate method in the vllm.LLM model.

{}

Returns:

Type Description
Union[str, List[str]]

The text generated by the model.

Source code in outlines/models/vllm_offline.py
def generate(
    self,
    model_input: Chat | str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str]]:
    """Generate text using vLLM offline.

    Parameters
    ----------
    prompt
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        in the `vllm.LLM` model.

    Returns
    -------
    Union[str, List[str]]
        The text generated by the model.

    """
    sampling_params = self._build_generation_args(
        inference_kwargs,
        output_type,
    )

    if isinstance(model_input, Chat):
        results = self.model.chat(
            messages=self.type_adapter.format_input(model_input),
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    else:
        results = self.model.generate(
            prompts=self.type_adapter.format_input(model_input),
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    results = [completion.text for completion in results[0].outputs]

    if len(results) == 1:
        return results[0]
    else:
        return results

generate_batch(model_input, output_type=None, **inference_kwargs)

Generate a batch of completions using vLLM offline.

Parameters:

Name Type Description Default
prompt

The list of prompts based on which the model will generate a response.

required
output_type Optional[Any]

The logits processor the model will use to constrain the format of the generated text.

None
inference_kwargs Any

Additional keyword arguments to pass to the generate method in the vllm.LLM model.

{}

Returns:

Type Description
Union[List[str], List[List[str]]]

The text generated by the model.

Source code in outlines/models/vllm_offline.py
def generate_batch(
    self,
    model_input: List[Chat | str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[List[str], List[List[str]]]:
    """Generate a batch of completions using vLLM offline.

    Parameters
    ----------
    prompt
        The list of prompts based on which the model will generate a
        response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        in the `vllm.LLM` model.

    Returns
    -------
    Union[List[str], List[List[str]]]
        The text generated by the model.

    """
    sampling_params = self._build_generation_args(
        inference_kwargs,
        output_type,
    )

    if any(isinstance(item, Chat) for item in model_input):
        raise TypeError(
            "Batch generation is not available for the `Chat` input type."
        )

    results = self.model.generate(
        prompts=[self.type_adapter.format_input(item) for item in model_input],
        sampling_params=sampling_params,
        **inference_kwargs,
    )
    return [[sample.text for sample in batch.outputs] for batch in results]

generate_stream(model_input, output_type, **inference_kwargs)

Not available for vllm.LLM.

TODO: Implement the streaming functionality ourselves.

Source code in outlines/models/vllm_offline.py
def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `vllm.LLM`.

    TODO: Implement the streaming functionality ourselves.

    """
    raise NotImplementedError(
        "Streaming is not available for the vLLM offline integration."
    )

VLLMOfflineTypeAdapter

Bases: ModelTypeAdapter

Type adapter for the VLLMOffline model.

Source code in outlines/models/vllm_offline.py
class VLLMOfflineTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `VLLMOffline` model."""

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Argument
        --------
        model_input
            The input passed by the user.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "VLLM offline. The only available types are `str` and "
            "`Chat` (containing a prompt and images)."
        )

    @format_input.register(str)
    def format_input_str(self, model_input: str) -> str:
        """Format a `str` input.

        """
        return model_input

    @format_input.register(Chat)
    def format_input_chat(self, model_input: Chat) -> list:
        """Format a `Chat` input.

        """
        for message in model_input.messages:
            content = message["content"]
            if isinstance(content, list):
                raise ValueError(
                    "Assets are not supported for vLLM offline."
                    "Please only use text content in the `Chat` input."
                )
        return OpenAITypeAdapter().format_input(model_input)

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the model.

        For vLLM, the structured output definition is set in the
        `GuidedDecodingParams` constructor that is provided as a value to the
        `guided_decoding` parameter of the `SamplingParams` constructor, itself
        provided as a value to the `sampling_params` parameter of the `generate`
        method.

        Parameters
        ----------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The arguments to provide to the `GuidedDecodingParams` constructor.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            return {"grammar": term.definition}
        elif isinstance(term, JsonSchema):
            guided_decoding_params = {"json": json.loads(term.schema)}
            if term.whitespace_pattern:
                guided_decoding_params["whitespace_pattern"] = term.whitespace_pattern
            return guided_decoding_params
        else:
            return {"regex": to_regex(term)}

format_input(model_input)

Generate the prompt argument to pass to the model.

Argument

model_input The input passed by the user.

Source code in outlines/models/vllm_offline.py
@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Argument
    --------
    model_input
        The input passed by the user.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "VLLM offline. The only available types are `str` and "
        "`Chat` (containing a prompt and images)."
    )

format_input_chat(model_input)

Format a Chat input.

Source code in outlines/models/vllm_offline.py
@format_input.register(Chat)
def format_input_chat(self, model_input: Chat) -> list:
    """Format a `Chat` input.

    """
    for message in model_input.messages:
        content = message["content"]
        if isinstance(content, list):
            raise ValueError(
                "Assets are not supported for vLLM offline."
                "Please only use text content in the `Chat` input."
            )
    return OpenAITypeAdapter().format_input(model_input)

format_input_str(model_input)

Format a str input.

Source code in outlines/models/vllm_offline.py
@format_input.register(str)
def format_input_str(self, model_input: str) -> str:
    """Format a `str` input.

    """
    return model_input

format_output_type(output_type=None)

Generate the structured output argument to pass to the model.

For vLLM, the structured output definition is set in the GuidedDecodingParams constructor that is provided as a value to the guided_decoding parameter of the SamplingParams constructor, itself provided as a value to the sampling_params parameter of the generate method.

Parameters:

Name Type Description Default
output_type Optional[Any]

The structured output type provided.

None

Returns:

Type Description
dict

The arguments to provide to the GuidedDecodingParams constructor.

Source code in outlines/models/vllm_offline.py
def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the model.

    For vLLM, the structured output definition is set in the
    `GuidedDecodingParams` constructor that is provided as a value to the
    `guided_decoding` parameter of the `SamplingParams` constructor, itself
    provided as a value to the `sampling_params` parameter of the `generate`
    method.

    Parameters
    ----------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The arguments to provide to the `GuidedDecodingParams` constructor.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        return {"grammar": term.definition}
    elif isinstance(term, JsonSchema):
        guided_decoding_params = {"json": json.loads(term.schema)}
        if term.whitespace_pattern:
            guided_decoding_params["whitespace_pattern"] = term.whitespace_pattern
        return guided_decoding_params
    else:
        return {"regex": to_regex(term)}

from_vllm_offline(model)

Create an Outlines VLLMOffline model instance from a vllm.LLM instance.

Parameters:

Name Type Description Default
model LLM

A vllm.LLM instance.

required

Returns:

Type Description
VLLMOffline

An Outlines VLLMOffline model instance.

Source code in outlines/models/vllm_offline.py
def from_vllm_offline(model: "LLM") -> VLLMOffline:
    """Create an Outlines `VLLMOffline` model instance from a `vllm.LLM`
    instance.

    Parameters
    ----------
    model
        A `vllm.LLM` instance.

    Returns
    -------
    VLLMOffline
        An Outlines `VLLMOffline` model instance.

    """
    return VLLMOffline(model)