models

Module that contains all the models integrated in outlines.

We group the models in submodules by provider instead of theme (completion, chat completion, diffusers, etc.) and use routing functions everywhere else in the codebase.

`Anthropic`

Bases: Model

Thin wrapper around the anthropic.Anthropic client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the anthropic.Anthropic client.

Source code in outlines/models/anthropic.py

class Anthropic(Model):
    """Thin wrapper around the `anthropic.Anthropic` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `anthropic.Anthropic` client.

    """
    def __init__(
        self, client: "AnthropicClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client
            An `anthropic.Anthropic` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = AnthropicTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            completion = self.client.messages.create(
                **messages,
                **inference_kwargs,
            )
        return completion.content[0].text

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Anthropic does not support batch generation."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.messages.create(
                **messages,
                stream=True,
                **inference_kwargs,
            )
            for chunk in stream:
                if (
                    chunk.type == "content_block_delta"
                    and chunk.delta.type == "text_delta"
                ):
                    yield chunk.delta.text

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Anthropic`	An `anthropic.Anthropic` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/anthropic.py

def __init__(
    self, client: "AnthropicClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client
        An `anthropic.Anthropic` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = AnthropicTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using Anthropic.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	As structured generation is not supported by Anthropic, the value of this argument must be `None`. Otherwise, an error will be raised at runtime.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The response generated by the model.

Source code in outlines/models/anthropic.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        completion = self.client.messages.create(
            **messages,
            **inference_kwargs,
        )
    return completion.content[0].text

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using Anthropic.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	As structured generation is not supported by Anthropic, the value of this argument must be `None`. Otherwise, an error will be raised at runtime.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/anthropic.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.messages.create(
            **messages,
            stream=True,
            **inference_kwargs,
        )
        for chunk in stream:
            if (
                chunk.type == "content_block_delta"
                and chunk.delta.type == "text_delta"
            ):
                yield chunk.delta.text

`AsyncDottxt`

Bases: AsyncModel

Async thin wrapper around the dottxt.client.AsyncDotTxt client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the dottxt.client.AsyncDotTxt client.

Source code in outlines/models/dottxt.py

class AsyncDottxt(AsyncModel):
    """Async thin wrapper around the `dottxt.client.AsyncDotTxt` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `dottxt.client.AsyncDotTxt`
    client.

    """

    def __init__(
        self,
        client: "AsyncDottxtClient",
        model: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            A `dottxt.AsyncDotTxt` client.
        model
            The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

        """
        self.client = client
        self.model = model
        self.type_adapter = DottxtTypeAdapter()

    async def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Dottxt asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model as a JSON string.

        """
        prompt = self.type_adapter.format_input(model_input)
        json_schema = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model is not None:
            inference_kwargs["model"] = self.model

        if "model" not in inference_kwargs:
            raise ValueError(
                "A model identifier is required. Pass it to `from_dottxt_async()` "
                "or as a `model=` keyword argument at generation time."
            )

        with normalize_provider_errors(PROVIDER):
            result = await self.client.generate(
                input=prompt,
                response_format=json_schema,
                **inference_kwargs,
            )

        return json.dumps(result)

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support batch generation."
        )

    async def generate_stream(  # type: ignore[override]
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support streaming. Call the model/generator for "
            + "regular generation instead."
        )
        yield  # makes this an async generator so `async for` can consume it

`init(client, model=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncDotTxt`	A `dottxt.AsyncDotTxt` client.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Source code in outlines/models/dottxt.py

def __init__(
    self,
    client: "AsyncDottxtClient",
    model: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        A `dottxt.AsyncDotTxt` client.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    """
    self.client = client
    self.model = model
    self.type_adapter = DottxtTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using Dottxt asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model as a JSON string.

Source code in outlines/models/dottxt.py

async def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Dottxt asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model as a JSON string.

    """
    prompt = self.type_adapter.format_input(model_input)
    json_schema = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model is not None:
        inference_kwargs["model"] = self.model

    if "model" not in inference_kwargs:
        raise ValueError(
            "A model identifier is required. Pass it to `from_dottxt_async()` "
            "or as a `model=` keyword argument at generation time."
        )

    with normalize_provider_errors(PROVIDER):
        result = await self.client.generate(
            input=prompt,
            response_format=json_schema,
            **inference_kwargs,
        )

    return json.dumps(result)

`AsyncLMStudio`

Bases: AsyncModel

Thin wrapper around a lmstudio.AsyncClient client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the LMStudio async client.

Source code in outlines/models/lmstudio.py

class AsyncLMStudio(AsyncModel):
    """Thin wrapper around a `lmstudio.AsyncClient` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the LMStudio async client.

    """

    def __init__(
        self, client: "AsyncClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client
            A LMStudio AsyncClient instance.
        model_name
            The name of the model to use. If not provided, uses the default
            loaded model in LMStudio.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = LMStudioTypeAdapter()
        self._context_entered = False

    async def close(self) -> None:
        """Close the async client and release resources."""
        if self._context_entered:
            await self.client.__aexit__(None, None, None)
            self._context_entered = False

    async def generate(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using LMStudio asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        str
            The text generated by the model.

        """
        if not self._context_entered:
            await self.client.__aenter__()
            self._context_entered = True

        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        result = await model.respond(formatted_input, **kwargs)
        return result.content

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `lmstudio` library does not support batch inference."
        )

    async def generate_stream(  # type: ignore
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using LMStudio asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        if not self._context_entered:
            await self.client.__aenter__()
            self._context_entered = True

        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        stream = await model.respond_stream(formatted_input, **kwargs)
        async for fragment in stream:
            yield fragment.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncClient`	A LMStudio AsyncClient instance.	required
`model_name`	`Optional[str]`	The name of the model to use. If not provided, uses the default loaded model in LMStudio.	`None`

Source code in outlines/models/lmstudio.py

def __init__(
    self, client: "AsyncClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client
        A LMStudio AsyncClient instance.
    model_name
        The name of the model to use. If not provided, uses the default
        loaded model in LMStudio.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = LMStudioTypeAdapter()
    self._context_entered = False

`close()` `async`

Close the async client and release resources.

Source code in outlines/models/lmstudio.py

async def close(self) -> None:
    """Close the async client and release resources."""
    if self._context_entered:
        await self.client.__aexit__(None, None, None)
        self._context_entered = False

`generate(model_input, output_type=None, **kwargs)` `async`

Generate text using LMStudio asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/lmstudio.py

async def generate(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using LMStudio asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    str
        The text generated by the model.

    """
    if not self._context_entered:
        await self.client.__aenter__()
        self._context_entered = True

    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    result = await model.respond(formatted_input, **kwargs)
    return result.content

`generate_stream(model_input, output_type=None, **kwargs)` `async`

Stream text using LMStudio asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/lmstudio.py

async def generate_stream(  # type: ignore
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using LMStudio asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    if not self._context_entered:
        await self.client.__aenter__()
        self._context_entered = True

    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    stream = await model.respond_stream(formatted_input, **kwargs)
    async for fragment in stream:
        yield fragment.content

`AsyncMistral`

Bases: AsyncModel

Async thin wrapper around the mistralai.Mistral client.

Converts input and output types to arguments for the mistralai.Mistral client's async methods (chat.complete_async or chat.stream_async).

Source code in outlines/models/mistral.py

class AsyncMistral(AsyncModel):
    """Async thin wrapper around the `mistralai.Mistral` client.

    Converts input and output types to arguments for the `mistralai.Mistral`
    client's async methods (`chat.complete_async` or `chat.stream_async`).

    """

    def __init__(
        self, client: "MistralClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client : MistralClient
            A mistralai.Mistral client instance.
        model_name : Optional[str]
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = MistralTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate a response from the model asynchronously.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs : Any
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The response generated by the model as text.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = await self.client.chat.complete_async(
                messages=messages,
                response_format=response_format,
                stream=False,
                **inference_kwargs,
            )

        outputs = [choice.message for choice in result.choices]

        if len(outputs) == 1:
            return outputs[0].content
        else:
            return [m.content for m in outputs]

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The mistralai library does not support batch inference."
        )

    async def generate_stream(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        """Generate text from the model as an async stream of chunks.

        Parameters
        ----------
        model_input
            str, list, or chat input to generate from.
        output_type
            Optional type for structured output.
        **inference_kwargs
            Extra kwargs like "model" name.

        Yields
        ------
        str
            Chunks of text as they are streamed.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.stream_async(
                messages=messages,
                response_format=response_format,
                **inference_kwargs
            )
            async for chunk in response:
                if (
                    hasattr(chunk, "data")
                    and chunk.data.choices
                    and len(chunk.data.choices) > 0
                    and hasattr(chunk.data.choices[0], "delta")
                    and chunk.data.choices[0].delta.content is not None
                ):
                    yield chunk.data.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/mistral.py

def __init__(
    self, client: "MistralClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = MistralTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate a response from the model asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The response generated by the model as text.

Source code in outlines/models/mistral.py

async def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate a response from the model asynchronously.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs : Any
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The response generated by the model as text.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = await self.client.chat.complete_async(
            messages=messages,
            response_format=response_format,
            stream=False,
            **inference_kwargs,
        )

    outputs = [choice.message for choice in result.choices]

    if len(outputs) == 1:
        return outputs[0].content
    else:
        return [m.content for m in outputs]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Generate text from the model as an async stream of chunks.

Parameters:

Name	Description	Default
`model_input`	str, list, or chat input to generate from.	required
`output_type`	Optional type for structured output.	`None`
`**inference_kwargs`	Extra kwargs like "model" name.	`{}`

Yields:

Type	Description
`str`	Chunks of text as they are streamed.

Source code in outlines/models/mistral.py

async def generate_stream(
    self,
    model_input,
    output_type=None,
    **inference_kwargs,
):
    """Generate text from the model as an async stream of chunks.

    Parameters
    ----------
    model_input
        str, list, or chat input to generate from.
    output_type
        Optional type for structured output.
    **inference_kwargs
        Extra kwargs like "model" name.

    Yields
    ------
    str
        Chunks of text as they are streamed.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.stream_async(
            messages=messages,
            response_format=response_format,
            **inference_kwargs
        )
        async for chunk in response:
            if (
                hasattr(chunk, "data")
                and chunk.data.choices
                and len(chunk.data.choices) > 0
                and hasattr(chunk.data.choices[0], "delta")
                and chunk.data.choices[0].delta.content is not None
            ):
                yield chunk.data.choices[0].delta.content

`AsyncOllama`

Bases: AsyncModel

Thin wrapper around the ollama.AsyncClient client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the ollama.AsyncClient client.

Source code in outlines/models/ollama.py

class AsyncOllama(AsyncModel):
    """Thin wrapper around the `ollama.AsyncClient` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `ollama.AsyncClient` client.

    """

    def __init__(
        self,client: "AsyncClient", model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `ollama.Client` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OllamaTypeAdapter()

    async def generate(self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                **kwargs,
            )
        return response.message.content

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `ollama` library does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                stream=True,
                **kwargs,
            )
            async for chunk in stream:
                yield chunk.message.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncClient`	The `ollama.Client` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/ollama.py

def __init__(
    self,client: "AsyncClient", model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `ollama.Client` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OllamaTypeAdapter()

`generate(model_input, output_type=None, **kwargs)` `async`

Generate text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/ollama.py

async def generate(self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )
    return response.message.content

`generate_stream(model_input, output_type=None, **kwargs)` `async`

Stream text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/ollama.py

async def generate_stream( # type: ignore
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            stream=True,
            **kwargs,
        )
        async for chunk in stream:
            yield chunk.message.content

`AsyncOpenAI`

Bases: AsyncModel

Thin wrapper around the openai.AsyncOpenAI client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.AsyncOpenAI client.

Source code in outlines/models/openai.py

class AsyncOpenAI(AsyncModel):
    """Thin wrapper around the `openai.AsyncOpenAI` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.AsyncOpenAI` client.

    """

    def __init__(
        self,
        client: Union["AsyncOpenAIClient", "AsyncAzureOpenAIClient"],
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OpenAITypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = await self.client.chat.completions.create(
                messages=messages,
                **response_format,
                **inference_kwargs,
            )

        messages = [choice.message for choice in result.choices]
        for message in messages:
            if message.refusal is not None:
                raise GenerationError(
                    f"OpenAI refused to answer the request: {message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `openai` library does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs,
    ) -> AsyncIterator[str]:
        """Stream text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                stream=True,
                messages=messages,
                **response_format,
                **inference_kwargs
            )
            async for chunk in stream:
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Union[AsyncOpenAI, AsyncAzureOpenAI]`	The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/openai.py

def __init__(
    self,
    client: Union["AsyncOpenAIClient", "AsyncAzureOpenAIClient"],
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OpenAITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/openai.py

async def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = await self.client.chat.completions.create(
            messages=messages,
            **response_format,
            **inference_kwargs,
        )

    messages = [choice.message for choice in result.choices]
    for message in messages:
        if message.refusal is not None:
            raise GenerationError(
                f"OpenAI refused to answer the request: {message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/openai.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs,
) -> AsyncIterator[str]:
    """Stream text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            stream=True,
            messages=messages,
            **response_format,
            **inference_kwargs
        )
        async for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`AsyncSGLang`

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py

class AsyncSGLang(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.
        model_name
            The name of the model to use.

        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using `sglang`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The SGLang server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "SGLang does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Return a text generator.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                **client_args,
                stream=True,
            )
            async for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`		An `openai.AsyncOpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Parameters:

Name	Type	Description	Default
`client`		An `openai.AsyncOpenAI` client instance.	required

Source code in outlines/models/sglang.py

def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.
    model_name
        The name of the model to use.

    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using sglang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/sglang.py

async def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using `sglang`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The SGLang server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Return a text generator.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Return a text generator.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )
        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`AsyncTGI`

Bases: AsyncModel

Thin async wrapper around a huggingface_hub.AsyncInferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.AsyncInferenceClient client.

Source code in outlines/models/tgi.py

class AsyncTGI(AsyncModel):
    """Thin async wrapper around a `huggingface_hub.AsyncInferenceClient`
    client used to communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.AsyncInferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `AsyncInferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    async def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.text_generation(**client_args)

        return response

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("TGI does not support batch inference.")

    async def generate_stream( # type: ignore
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.text_generation(
                **client_args, stream=True
            )
            async for chunk in stream:  # pragma: no cover
                yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

`init(client)`

Parameters:

Name	Type	Description	Default
`client`		A huggingface `AsyncInferenceClient` client instance.	required

Source code in outlines/models/tgi.py

def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `AsyncInferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/tgi.py

async def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.text_generation(**client_args)

    return response

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py

async def generate_stream( # type: ignore
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.text_generation(
            **client_args, stream=True
        )
        async for chunk in stream:  # pragma: no cover
            yield chunk

`AsyncVLLM`

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py

class AsyncVLLM(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "AsyncOpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("VLLM does not support batch inference.")

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.
        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                **client_args,
                stream=True,
            )
            async for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncOpenAI`	An `openai.AsyncOpenAI` client instance.	required

Source code in outlines/models/vllm.py

def __init__(
    self,
    client: "AsyncOpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/vllm.py

async def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.
    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )
        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`Dottxt`

Bases: Model

Thin wrapper around the dottxt.client.DotTxt client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the dottxt.client.DotTxt client.

Source code in outlines/models/dottxt.py

class Dottxt(Model):
    """Thin wrapper around the `dottxt.client.DotTxt` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `dottxt.client.DotTxt` client.

    """

    def __init__(
        self,
        client: "DottxtClient",
        model: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            A `dottxt.DotTxt` client.
        model
            The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

        """
        self.client = client
        self.model = model
        self.type_adapter = DottxtTypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Dottxt.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model as a JSON string.

        """
        prompt = self.type_adapter.format_input(model_input)
        json_schema = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model is not None:
            inference_kwargs["model"] = self.model

        if "model" not in inference_kwargs:
            raise ValueError(
                "A model identifier is required. Pass it to `from_dottxt()` "
                "or as a `model=` keyword argument at generation time."
            )

        with normalize_provider_errors(PROVIDER):
            result = self.client.generate(
                input=prompt,
                response_format=json_schema,
                **inference_kwargs,
            )

        return json.dumps(result)


    def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support batch generation."
        )

    def generate_stream(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support streaming. Call the model/generator for "
            + "regular generation instead."
        )

`init(client, model=None)`

Parameters:

Name	Type	Description	Default
`client`	`DotTxt`	A `dottxt.DotTxt` client.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Source code in outlines/models/dottxt.py

def __init__(
    self,
    client: "DottxtClient",
    model: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        A `dottxt.DotTxt` client.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    """
    self.client = client
    self.model = model
    self.type_adapter = DottxtTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using Dottxt.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model as a JSON string.

Source code in outlines/models/dottxt.py

def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Dottxt.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model as a JSON string.

    """
    prompt = self.type_adapter.format_input(model_input)
    json_schema = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model is not None:
        inference_kwargs["model"] = self.model

    if "model" not in inference_kwargs:
        raise ValueError(
            "A model identifier is required. Pass it to `from_dottxt()` "
            "or as a `model=` keyword argument at generation time."
        )

    with normalize_provider_errors(PROVIDER):
        result = self.client.generate(
            input=prompt,
            response_format=json_schema,
            **inference_kwargs,
        )

    return json.dumps(result)

`Gemini`

Bases: Model

Thin wrapper around the google.genai.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the google.genai.Client client.

Source code in outlines/models/gemini.py

class Gemini(Model):
    """Thin wrapper around the `google.genai.Client` client.

    This wrapper is used to convert the input and output types specified by
    the users at a higher level to arguments to the `google.genai.Client`
    client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            A `google.genai.Client` instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = GeminiTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> str:
        """Generate a response from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        with normalize_provider_errors(PROVIDER):
            completion = self.client.models.generate_content(
                **contents,
                model=inference_kwargs.pop("model", self.model_name),
                config={**generation_config, **inference_kwargs}
            )

        return completion.text

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Gemini does not support batch generation."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        with normalize_provider_errors(PROVIDER):
            stream = self.client.models.generate_content_stream(
                **contents,
                model=inference_kwargs.pop("model", self.model_name),
                config={**generation_config, **inference_kwargs},
            )
            for chunk in stream:
                if hasattr(chunk, "text") and chunk.text:
                    yield chunk.text

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	A `google.genai.Client` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/gemini.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = GeminiTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate a response from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The response generated by the model.

Source code in outlines/models/gemini.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> str:
    """Generate a response from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    with normalize_provider_errors(PROVIDER):
        completion = self.client.models.generate_content(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs}
        )

    return completion.text

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Generate a stream of responses from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/gemini.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    with normalize_provider_errors(PROVIDER):
        stream = self.client.models.generate_content_stream(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs},
        )
        for chunk in stream:
            if hasattr(chunk, "text") and chunk.text:
                yield chunk.text

`LMStudio`

Bases: Model

Thin wrapper around a lmstudio.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the LMStudio client.

Source code in outlines/models/lmstudio.py

class LMStudio(Model):
    """Thin wrapper around a `lmstudio.Client` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the LMStudio client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            A LMStudio Client instance obtained via `lmstudio.Client()` or
            `lmstudio.get_default_client()`.
        model_name
            The name of the model to use. If not provided, uses the default
            loaded model in LMStudio.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = LMStudioTypeAdapter()

    def generate(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using LMStudio.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        result = model.respond(formatted_input, **kwargs)
        return result.content

    def generate_batch(
        self,
        model_input,
        output_type=None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `lmstudio` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using LMStudio.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        stream = model.respond_stream(formatted_input, **kwargs)
        for fragment in stream:
            yield fragment.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	A LMStudio Client instance obtained via `lmstudio.Client()` or `lmstudio.get_default_client()`.	required
`model_name`	`Optional[str]`	The name of the model to use. If not provided, uses the default loaded model in LMStudio.	`None`

Source code in outlines/models/lmstudio.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        A LMStudio Client instance obtained via `lmstudio.Client()` or
        `lmstudio.get_default_client()`.
    model_name
        The name of the model to use. If not provided, uses the default
        loaded model in LMStudio.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = LMStudioTypeAdapter()

`generate(model_input, output_type=None, **kwargs)`

Generate text using LMStudio.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/lmstudio.py

def generate(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using LMStudio.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    result = model.respond(formatted_input, **kwargs)
    return result.content

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using LMStudio.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/lmstudio.py

def generate_stream(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> Iterator[str]:
    """Stream text using LMStudio.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    stream = model.respond_stream(formatted_input, **kwargs)
    for fragment in stream:
        yield fragment.content

`LlamaCpp`

Bases: Model

Thin wrapper around the llama_cpp.Llama model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the llama_cpp.Llama model.

Source code in outlines/models/llamacpp.py

class LlamaCpp(Model):
    """Thin wrapper around the `llama_cpp.Llama` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `llama_cpp.Llama` model.
    """

    tensor_library_name = "numpy"

    def __init__(self, model: "Llama", chat_mode: bool = True):
        """
        Parameters
        ----------
        model
            A `llama_cpp.Llama` model instance.
        chat_mode
            Whether to enable chat mode. If `False`, the model will regard
            all `str` inputs as plain text prompts. If `True`, the model will
            regard all `str` inputs as user messages in a chat conversation.

        """
        self.model = model
        self.tokenizer = LlamaCppTokenizer(self.model)

        # Note: llama-cpp-python provides a default chat-template fallback even when
        # the user hasn't explicitly configured one:
        # https://github.com/abetlen/llama-cpp-python/blob/c37132b/llama_cpp/llama.py#L540-L545
        # We keep the default as True because the upstream library generally favors chat-style usage.
        self.type_adapter = LlamaCppTypeAdapter(has_chat_template=chat_mode)

    def generate(
        self,
        model_input: Union[Chat, str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        prompt = self.type_adapter.format_input(model_input)

        if isinstance(prompt, str):
            completion = self.model(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                **inference_kwargs,
            )
            result = completion["choices"][0]["text"]
        elif isinstance(prompt, list):
            completion = self.model.create_chat_completion(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                **inference_kwargs,
            )
            result = completion["choices"][0]["message"]["content"]
        else:  # Never reached  # pragma: no cover
            raise ValueError("Unexpected prompt type.")

        self.model.reset()

        return result

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("LlamaCpp does not support batch generation.")

    def generate_stream(
        self,
        model_input: Union[Chat, str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        prompt = self.type_adapter.format_input(model_input)

        if isinstance(prompt, str):
            generator = self.model(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                stream=True,
                **inference_kwargs,
            )
            for chunk in generator:
                yield chunk["choices"][0]["text"]

        elif isinstance(prompt, list):
            generator = self.model.create_chat_completion(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                stream=True,
                **inference_kwargs,
            )
            for chunk in generator:
                yield chunk["choices"][0]["delta"].get("content", "")
        else:  # Never reached  # pragma: no cover
            raise ValueError("Unexpected prompt type.")

`init(model, chat_mode=True)`

Parameters:

Name	Type	Description	Default
`model`	`Llama`	A `llama_cpp.Llama` model instance.	required
`chat_mode`	`bool`	Whether to enable chat mode. If `False`, the model will regard all `str` inputs as plain text prompts. If `True`, the model will regard all `str` inputs as user messages in a chat conversation.	`True`

Source code in outlines/models/llamacpp.py

def __init__(self, model: "Llama", chat_mode: bool = True):
    """
    Parameters
    ----------
    model
        A `llama_cpp.Llama` model instance.
    chat_mode
        Whether to enable chat mode. If `False`, the model will regard
        all `str` inputs as plain text prompts. If `True`, the model will
        regard all `str` inputs as user messages in a chat conversation.

    """
    self.model = model
    self.tokenizer = LlamaCppTokenizer(self.model)

    # Note: llama-cpp-python provides a default chat-template fallback even when
    # the user hasn't explicitly configured one:
    # https://github.com/abetlen/llama-cpp-python/blob/c37132b/llama_cpp/llama.py#L540-L545
    # We keep the default as True because the upstream library generally favors chat-style usage.
    self.type_adapter = LlamaCppTypeAdapter(has_chat_template=chat_mode)

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using llama-cpp-python.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the `Llama.__call__` method of the `llama-cpp-python` library.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/llamacpp.py

def generate(
    self,
    model_input: Union[Chat, str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    prompt = self.type_adapter.format_input(model_input)

    if isinstance(prompt, str):
        completion = self.model(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            **inference_kwargs,
        )
        result = completion["choices"][0]["text"]
    elif isinstance(prompt, list):
        completion = self.model.create_chat_completion(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            **inference_kwargs,
        )
        result = completion["choices"][0]["message"]["content"]
    else:  # Never reached  # pragma: no cover
        raise ValueError("Unexpected prompt type.")

    self.model.reset()

    return result

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using llama-cpp-python.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the `Llama.__call__` method of the `llama-cpp-python` library.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/llamacpp.py

def generate_stream(
    self,
    model_input: Union[Chat, str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    prompt = self.type_adapter.format_input(model_input)

    if isinstance(prompt, str):
        generator = self.model(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            stream=True,
            **inference_kwargs,
        )
        for chunk in generator:
            yield chunk["choices"][0]["text"]

    elif isinstance(prompt, list):
        generator = self.model.create_chat_completion(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            stream=True,
            **inference_kwargs,
        )
        for chunk in generator:
            yield chunk["choices"][0]["delta"].get("content", "")
    else:  # Never reached  # pragma: no cover
        raise ValueError("Unexpected prompt type.")

`MLXLM`

Bases: Model

Thin wrapper around an mlx_lm model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the mlx_lm library.

Source code in outlines/models/mlxlm.py

class MLXLM(Model):
    """Thin wrapper around an `mlx_lm` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `mlx_lm` library.

    """

    tensor_library_name = "mlx"

    def __init__(
        self,
        model: "nn.Module",
        tokenizer: "MLXTokenizer",
    ):
        """
        Parameters
        ----------
        model
            An instance of an `mlx_lm` model.
        tokenizer
            An instance of an `mlx_lm` tokenizer or of a compatible
            `transformers` tokenizer.

        """
        self.model = model
        # self.mlx_tokenizer is used by the mlx-lm in its generate function
        self.mlx_tokenizer = tokenizer
        # self.tokenizer is used by the logits processor
        # tokenizer may be a mlx_lm.TokenizerWrapper (whose ._tokenizer is a
        # PreTrainedTokenizerFast) or a PreTrainedTokenizerFast passed directly
        inner = getattr(tokenizer, "_tokenizer", tokenizer)
        hf_tokenizer = inner if isinstance(inner, PreTrainedTokenizerBase) else tokenizer
        self.tokenizer = TransformerTokenizer(hf_tokenizer)
        self.type_adapter = MLXLMTypeAdapter(
            tokenizer=tokenizer,
            has_chat_template=_check_hf_chat_template(tokenizer)
        )

    def generate(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> str:
        """Generate text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        from mlx_lm import generate

        return generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )

    def generate_batch(
        self,
        model_input: list[str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> list[str]:
        """Generate a batch of text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The list of prompts based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        list[str]
            The list of text generated by the model.

        """
        from mlx_lm import batch_generate

        if output_type:
            raise NotImplementedError(
                "mlx-lm does not support constrained generation with batching."
                + "You cannot provide an `output_type` with this method."
            )

        model_input = [self.type_adapter.format_input(item) for item in model_input]

        # Contrarily to the other generate methods, batch_generate requires
        # tokenized prompts
        add_special_tokens = [
            (
                self.mlx_tokenizer.bos_token is None
                or not prompt.startswith(self.mlx_tokenizer.bos_token)
            )
            for prompt in model_input
        ]
        tokenized_model_input = [
            self.mlx_tokenizer.encode(
                model_input[i], add_special_tokens=add_special_tokens[i]
            )
            for i in range(len(model_input))
        ]

        response = batch_generate(
            self.model,
            self.mlx_tokenizer,
            tokenized_model_input,
            **kwargs,
        )

        return response.texts

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> Iterator[str]:
        """Stream text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        from mlx_lm import stream_generate

        for gen_response in stream_generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        ):
            yield gen_response.text

`init(model, tokenizer)`

Parameters:

Name	Type	Description	Default
`model`	`Module`	An instance of an `mlx_lm` model.	required
`tokenizer`	`MLXTokenizer`	An instance of an `mlx_lm` tokenizer or of a compatible `transformers` tokenizer.	required

Source code in outlines/models/mlxlm.py

def __init__(
    self,
    model: "nn.Module",
    tokenizer: "MLXTokenizer",
):
    """
    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        `transformers` tokenizer.

    """
    self.model = model
    # self.mlx_tokenizer is used by the mlx-lm in its generate function
    self.mlx_tokenizer = tokenizer
    # self.tokenizer is used by the logits processor
    # tokenizer may be a mlx_lm.TokenizerWrapper (whose ._tokenizer is a
    # PreTrainedTokenizerFast) or a PreTrainedTokenizerFast passed directly
    inner = getattr(tokenizer, "_tokenizer", tokenizer)
    hf_tokenizer = inner if isinstance(inner, PreTrainedTokenizerBase) else tokenizer
    self.tokenizer = TransformerTokenizer(hf_tokenizer)
    self.type_adapter = MLXLMTypeAdapter(
        tokenizer=tokenizer,
        has_chat_template=_check_hf_chat_template(tokenizer)
    )

`generate(model_input, output_type=None, **kwargs)`

Generate text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/mlxlm.py

def generate(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> str:
    """Generate text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    from mlx_lm import generate

    return generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    )

`generate_batch(model_input, output_type=None, **kwargs)`

Generate a batch of text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`list[str]`	The list of prompts based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`list[str]`	The list of text generated by the model.

Source code in outlines/models/mlxlm.py

def generate_batch(
    self,
    model_input: list[str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> list[str]:
    """Generate a batch of text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The list of prompts based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    list[str]
        The list of text generated by the model.

    """
    from mlx_lm import batch_generate

    if output_type:
        raise NotImplementedError(
            "mlx-lm does not support constrained generation with batching."
            + "You cannot provide an `output_type` with this method."
        )

    model_input = [self.type_adapter.format_input(item) for item in model_input]

    # Contrarily to the other generate methods, batch_generate requires
    # tokenized prompts
    add_special_tokens = [
        (
            self.mlx_tokenizer.bos_token is None
            or not prompt.startswith(self.mlx_tokenizer.bos_token)
        )
        for prompt in model_input
    ]
    tokenized_model_input = [
        self.mlx_tokenizer.encode(
            model_input[i], add_special_tokens=add_special_tokens[i]
        )
        for i in range(len(model_input))
    ]

    response = batch_generate(
        self.model,
        self.mlx_tokenizer,
        tokenized_model_input,
        **kwargs,
    )

    return response.texts

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/mlxlm.py

def generate_stream(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> Iterator[str]:
    """Stream text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    from mlx_lm import stream_generate

    for gen_response in stream_generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    ):
        yield gen_response.text

`Mistral`

Bases: Model

Thin wrapper around the mistralai.Mistral client.

Converts input and output types to arguments for the mistralai.Mistral client's chat.complete or chat.stream methods.

Source code in outlines/models/mistral.py

class Mistral(Model):
    """Thin wrapper around the `mistralai.Mistral` client.

    Converts input and output types to arguments for the `mistralai.Mistral`
    client's `chat.complete` or `chat.stream` methods.

    """

    def __init__(
        self, client: "MistralClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client : MistralClient
            A mistralai.Mistral client instance.
        model_name : Optional[str]
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = MistralTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate a response from the model.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs : Any
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The response generated by the model as text.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = self.client.chat.complete(
                messages=messages,
                response_format=response_format,
                **inference_kwargs,
            )

        outputs = [choice.message for choice in result.choices]

        if len(outputs) == 1:
            return outputs[0].content
        else:
            return [m.content for m in outputs]

    def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `mistralai` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text chunks generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.stream(
                messages=messages,
                response_format=response_format,
                **inference_kwargs
            )
            for chunk in stream:
                if (
                    hasattr(chunk, "data")
                    and chunk.data.choices
                    and chunk.data.choices[0].delta.content is not None
                ):
                    yield chunk.data.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/mistral.py

def __init__(
    self, client: "MistralClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = MistralTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate a response from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The response generated by the model as text.

Source code in outlines/models/mistral.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate a response from the model.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs : Any
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The response generated by the model as text.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = self.client.chat.complete(
            messages=messages,
            response_format=response_format,
            **inference_kwargs,
        )

    outputs = [choice.message for choice in result.choices]

    if len(outputs) == 1:
        return outputs[0].content
    else:
        return [m.content for m in outputs]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Generate a stream of responses from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text chunks generated by the model.

Source code in outlines/models/mistral.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text chunks generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.stream(
            messages=messages,
            response_format=response_format,
            **inference_kwargs
        )
        for chunk in stream:
            if (
                hasattr(chunk, "data")
                and chunk.data.choices
                and chunk.data.choices[0].delta.content is not None
            ):
                yield chunk.data.choices[0].delta.content

`Model`

Bases: ABC

Base class for all synchronous models.

This class defines shared __call__, batch and stream methods that can be used to call the model directly. The generate, generate_batch, and generate_stream methods must be implemented by the subclasses. All models inheriting from this class must define a type_adapter attribute of type ModelTypeAdapter. The methods of the type_adapter attribute are used in the generate, generate_batch, and generate_stream methods to format the input and output types received by the model. Additionally, steerable models must define a tensor_library_name attribute.

Source code in outlines/models/base.py

class Model(ABC):
    """Base class for all synchronous models.

    This class defines shared `__call__`, `batch` and `stream` methods that can
    be used to call the model directly. The `generate`, `generate_batch`, and
    `generate_stream` methods must be implemented by the subclasses.
    All models inheriting from this class must define a `type_adapter`
    attribute of type `ModelTypeAdapter`. The methods of the `type_adapter`
    attribute are used in the `generate`, `generate_batch`, and
    `generate_stream` methods to format the input and output types received by
    the model.
    Additionally, steerable models must define a `tensor_library_name`
    attribute.

    """
    type_adapter: ModelTypeAdapter
    tensor_library_name: str

    def __call__(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Call the model.

        Users can call the model directly, in which case we will create a
        generator instance with the output type provided and call it.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        generator("prompt")
        ```
        and
        ```python
        model("prompt", Foo)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        from outlines.generator import Generator

        return Generator(self, output_type, backend)(model_input, **inference_kwargs)

    def batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Make a batch call to the model (several inputs at once).

        Users can use the `batch` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `batch` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        generator.batch(["prompt1", "prompt2"])
        ```
        and
        ```python
        model.batch(["prompt1", "prompt2"], Foo)
        ```

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return generator.batch(model_input, **inference_kwargs) # type: ignore

    def stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Stream a response from the model.

        Users can use the `stream` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `stream` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        for chunk in generator("prompt"):
            print(chunk)
        ```
        and
        ```python
        for chunk in model.stream("prompt", Foo):
            print(chunk)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return generator.stream(model_input, **inference_kwargs) # type: ignore

    @abstractmethod
    def generate(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Generate a response from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        ...

    @abstractmethod
    def generate_batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Generate a batch of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        ...
    @abstractmethod
    def generate_stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Generate a stream of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        ...

`call(model_input, output_type=None, backend=None, **inference_kwargs)`

Call the model.

Users can call the model directly, in which case we will create a generator instance with the output type provided and call it. Thus, those commands are equivalent:

generator = Generator(model, Foo)
generator("prompt")

and

model("prompt", Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

def __call__(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> Any:
    """Call the model.

    Users can call the model directly, in which case we will create a
    generator instance with the output type provided and call it.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    generator("prompt")
    ```
    and
    ```python
    model("prompt", Foo)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    from outlines.generator import Generator

    return Generator(self, output_type, backend)(model_input, **inference_kwargs)

`batch(model_input, output_type=None, backend=None, **inference_kwargs)`

Make a batch call to the model (several inputs at once).

Users can use the batch method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its batch method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
generator.batch(["prompt1", "prompt2"])

and

model.batch(["prompt1", "prompt2"], Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

def batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Make a batch call to the model (several inputs at once).

    Users can use the `batch` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `batch` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    generator.batch(["prompt1", "prompt2"])
    ```
    and
    ```python
    model.batch(["prompt1", "prompt2"], Foo)
    ```

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return generator.batch(model_input, **inference_kwargs) # type: ignore

`generate(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a response from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

@abstractmethod
def generate(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Generate a response from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    ...

`generate_batch(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a batch of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

@abstractmethod
def generate_batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Generate a batch of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    ...

`generate_stream(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a stream of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[Any]`	A stream of responses from the model.

Source code in outlines/models/base.py

@abstractmethod
def generate_stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Generate a stream of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    ...

`stream(model_input, output_type=None, backend=None, **inference_kwargs)`

Stream a response from the model.

Users can use the stream method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its stream method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
for chunk in generator("prompt"):
    print(chunk)

and

for chunk in model.stream("prompt", Foo):
    print(chunk)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[Any]`	A stream of responses from the model.

Source code in outlines/models/base.py

def stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Stream a response from the model.

    Users can use the `stream` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `stream` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    for chunk in generator("prompt"):
        print(chunk)
    ```
    and
    ```python
    for chunk in model.stream("prompt", Foo):
        print(chunk)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return generator.stream(model_input, **inference_kwargs) # type: ignore

`ModelTypeAdapter`

Bases: ABC

Base class for all model type adapters.

A type adapter instance must be given as a value to the type_adapter attribute when instantiating a model. The type adapter is responsible for formatting the input and output types passed to the model to match the specific format expected by the associated model.

Source code in outlines/models/base.py

class ModelTypeAdapter(ABC):
    """Base class for all model type adapters.

    A type adapter instance must be given as a value to the `type_adapter`
    attribute when instantiating a model.
    The type adapter is responsible for formatting the input and output types
    passed to the model to match the specific format expected by the
    associated model.

    """

    @abstractmethod
    def format_input(self, model_input: Any) -> Any:
        """Format the user input to the expected format of the model.

        For API-based models, it typically means creating the `messages`
        argument passed to the client. For local models, it can mean casting
        the input from str to list for instance.
        This method is also used to validate that the input type provided by
        the user is supported by the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        Any
            The formatted input to be passed to the model.

        """
        ...

    @abstractmethod
    def format_output_type(self, output_type: Optional[Any] = None) -> Any:
        """Format the output type to the expected format of the model.

        For black-box models, this typically means creating a `response_format`
        argument. For steerable models, it means formatting the logits processor
        to create the object type expected by the model.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        Any
            The formatted output type to be passed to the model.

        """
        ...

`format_input(model_input)` `abstractmethod`

Format the user input to the expected format of the model.

For API-based models, it typically means creating the messages argument passed to the client. For local models, it can mean casting the input from str to list for instance. This method is also used to validate that the input type provided by the user is supported by the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required

Returns:

Type	Description
`Any`	The formatted input to be passed to the model.

Source code in outlines/models/base.py

@abstractmethod
def format_input(self, model_input: Any) -> Any:
    """Format the user input to the expected format of the model.

    For API-based models, it typically means creating the `messages`
    argument passed to the client. For local models, it can mean casting
    the input from str to list for instance.
    This method is also used to validate that the input type provided by
    the user is supported by the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    Any
        The formatted input to be passed to the model.

    """
    ...

`format_output_type(output_type=None)` `abstractmethod`

Format the output type to the expected format of the model.

For black-box models, this typically means creating a response_format argument. For steerable models, it means formatting the logits processor to create the object type expected by the model.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`Any`	The formatted output type to be passed to the model.

Source code in outlines/models/base.py

@abstractmethod
def format_output_type(self, output_type: Optional[Any] = None) -> Any:
    """Format the output type to the expected format of the model.

    For black-box models, this typically means creating a `response_format`
    argument. For steerable models, it means formatting the logits processor
    to create the object type expected by the model.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    Any
        The formatted output type to be passed to the model.

    """
    ...

`Ollama`

Bases: Model

Thin wrapper around the ollama.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the ollama.Client client.

Source code in outlines/models/ollama.py

class Ollama(Model):
    """Thin wrapper around the `ollama.Client` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `ollama.Client` client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            The `ollama.Client` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OllamaTypeAdapter()

    def generate(self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        print(self.type_adapter.format_input(model_input))

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                **kwargs,
            )

        return response.message.content

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `ollama` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                stream=True,
                **kwargs,
            )
            for chunk in response:
                yield chunk.message.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	The `ollama.Client` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/ollama.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        The `ollama.Client` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OllamaTypeAdapter()

`generate(model_input, output_type=None, **kwargs)`

Generate text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/ollama.py

def generate(self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    print(self.type_adapter.format_input(model_input))

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )

    return response.message.content

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/ollama.py

def generate_stream(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> Iterator[str]:
    """Stream text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            stream=True,
            **kwargs,
        )
        for chunk in response:
            yield chunk.message.content

`OpenAI`

Bases: Model

Thin wrapper around the openai.OpenAI client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client.

Source code in outlines/models/openai.py

class OpenAI(Model):
    """Thin wrapper around the `openai.OpenAI` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client.

    """

    def __init__(
        self,
        client: Union["OpenAIClient", "AzureOpenAIClient"],
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `openai.OpenAI` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OpenAITypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = self.client.chat.completions.create(
                messages=messages,
                **response_format,
                **inference_kwargs,
            )

        messages = [choice.message for choice in result.choices]
        for message in messages:
            if message.refusal is not None:
                raise GenerationError(
                    f"OpenAI refused to answer the request: {message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `openai` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Stream text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                stream=True,
                messages=messages,
                **response_format,
                **inference_kwargs
            )
            for chunk in stream:
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AzureOpenAI]`	The `openai.OpenAI` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/openai.py

def __init__(
    self,
    client: Union["OpenAIClient", "AzureOpenAIClient"],
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `openai.OpenAI` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OpenAITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/openai.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = self.client.chat.completions.create(
            messages=messages,
            **response_format,
            **inference_kwargs,
        )

    messages = [choice.message for choice in result.choices]
    for message in messages:
        if message.refusal is not None:
            raise GenerationError(
                f"OpenAI refused to answer the request: {message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/openai.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Stream text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            stream=True,
            messages=messages,
            **response_format,
            **inference_kwargs
        )
        for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`SGLang`

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py

class SGLang(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The SGLang server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "SGLang does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`		An `openai.OpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/sglang.py

def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using SGLang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/sglang.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The SGLang server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using SGLang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`TGI`

Bases: Model

Thin wrapper around a huggingface_hub.InferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.InferenceClient client.

Source code in outlines/models/tgi.py

class TGI(Model):
    """Thin wrapper around a `huggingface_hub.InferenceClient` client used to
    communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.InferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `InferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            return self.client.text_generation(**client_args)

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("TGI does not support batch inference.")

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.text_generation(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

`init(client)`

Parameters:

Name	Type	Description	Default
`client`		A huggingface `InferenceClient` client instance.	required

Source code in outlines/models/tgi.py

def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `InferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/tgi.py

def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        return self.client.text_generation(**client_args)

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py

def generate_stream(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.text_generation(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            yield chunk

`TransformerTokenizer`

Bases: Tokenizer

Represents a tokenizer for models in the transformers library.

Source code in outlines/models/transformers.py

class TransformerTokenizer(Tokenizer):
    """Represents a tokenizer for models in the `transformers` library."""

    def __init__(self, tokenizer: "PreTrainedTokenizer", **kwargs):
        self.tokenizer = tokenizer
        self.eos_token_id = self.tokenizer.eos_token_id
        self.eos_token = self.tokenizer.eos_token
        self.get_vocab = self.tokenizer.get_vocab

        if self.tokenizer.pad_token_id is None:
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
            self.pad_token_id = self.eos_token_id
        else:
            self.pad_token_id = self.tokenizer.pad_token_id
            self.pad_token = self.tokenizer.pad_token

        self.special_tokens = set(self.tokenizer.all_special_tokens)

        self.vocabulary = self.tokenizer.get_vocab()
        self.is_llama = isinstance(self.tokenizer, get_llama_tokenizer_types())

    def encode(
        self, prompt: Union[str, List[str]], **kwargs
    ) -> Tuple["torch.LongTensor", "torch.LongTensor"]:
        kwargs["padding"] = True
        kwargs["return_tensors"] = "pt"
        output = self.tokenizer(prompt, **kwargs)
        return output["input_ids"], output["attention_mask"]

    def decode(self, token_ids: "torch.LongTensor") -> List[str]:
        text = self.tokenizer.batch_decode(token_ids, skip_special_tokens=True)
        return text

    def convert_token_to_string(self, token: str) -> str:
        string = self.tokenizer.convert_tokens_to_string([token])

        if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
            return " " + string

        return string

    def __eq__(self, other):
        if isinstance(other, type(self)):
            if hasattr(self, "model_name") and hasattr(self, "kwargs"):
                return (
                    other.model_name == self.model_name and other.kwargs == self.kwargs
                )
            else:
                return other.tokenizer == self.tokenizer
        return NotImplemented

    def __hash__(self):
        from datasets.fingerprint import Hasher

        return hash(Hasher.hash(self.tokenizer))

    def __getstate__(self):
        state = {"tokenizer": self.tokenizer}
        return state

    def __setstate__(self, state):
        self.__init__(state["tokenizer"])

`Transformers`

Bases: Model

Thin wrapper around a transformers model and a transformers tokenizer.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and tokenizer.

Source code in outlines/models/transformers.py

class Transformers(Model):
    """Thin wrapper around a `transformers` model and a `transformers`
    tokenizer.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    tokenizer.

    """

    def __init__(
        self,
        model: "PreTrainedModel",
        tokenizer: "PreTrainedTokenizer",
        *,
        device_dtype: Optional["torch.dtype"] = None,
    ):
        """
        Parameters:
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        tokenizer
            A `PreTrainedTokenizer`, or any tokenizer that is compatible with
            the `transformers` API for tokenizers.
        device_dtype
            The dtype to use for the model. If not provided, the model will use
            the default dtype.

        """
        # We need to handle the cases in which jax/flax or tensorflow
        # is not available in the environment.
        try:
            from transformers import FlaxPreTrainedModel
        except ImportError:  # pragma: no cover
            FlaxPreTrainedModel = None

        try:
            from transformers import TFPreTrainedModel
        except ImportError:  # pragma: no cover
            TFPreTrainedModel = None

        tokenizer.padding_side = "left"
        self.model = model
        self.hf_tokenizer = tokenizer
        self.tokenizer = TransformerTokenizer(tokenizer)
        self.device_dtype = device_dtype
        self.type_adapter = TransformersTypeAdapter(
            tokenizer=tokenizer,
            has_chat_template=_check_hf_chat_template(tokenizer)
        )

        if (
            FlaxPreTrainedModel is not None
            and isinstance(model, FlaxPreTrainedModel)
        ):  # pragma: no cover
            self.tensor_library_name = "jax"
            warnings.warn("""
                Support for `jax` has been deprecated and will be removed in
                version 1.4.0 of Outlines. Please use `torch` instead.
                Transformers models using `jax` do not support structured
                generation.
                """,
                DeprecationWarning,
                stacklevel=2,
            )
        elif (
            TFPreTrainedModel is not None
            and isinstance(model, TFPreTrainedModel)
        ):  # pragma: no cover
            self.tensor_library_name = "tensorflow"
            warnings.warn("""
                Support for `tensorflow` has been deprecated and will be removed in
                version 1.4.0 of Outlines. Please use `torch` instead.
                Transformers models using `tensorflow` do not support structured
                generation.
                """,
                DeprecationWarning,
                stacklevel=2,
            )
        else:
            self.tensor_library_name = "torch"

    def _prepare_model_inputs(
        self,
        model_input,
        is_batch: bool = False,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        # Format validation
        if is_batch:
            prompts = [
                self.type_adapter.format_input(item)
                for item in model_input
            ]
        else:
            prompts = self.type_adapter.format_input(model_input)
        input_ids, attention_mask = self.tokenizer.encode(prompts)
        inputs = {
            "input_ids": input_ids.to(self.model.device),
            "attention_mask": (
                attention_mask.to(self.model.device, dtype=self.device_dtype)
                if self.device_dtype is not None
                else attention_mask.to(self.model.device)
            ),
        }

        return prompts, inputs

    def generate(
        self,
        model_input: Union[str, dict, Chat],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str]]:
        """Generate text using `transformers`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response. For
            multi-modal models, the input should be a dictionary containing the
            `text` key with a value of type `Union[str, List[str]]` and the
            other keys required by the model.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            of the `transformers` model.

        Returns
        -------
        Union[str, List[str]]
            The text generated by the model.

        """
        prompts, inputs = self._prepare_model_inputs(model_input, False)
        logits_processor = self.type_adapter.format_output_type(output_type)

        generated_ids = self._generate_output_seq(
            prompts,
            inputs,
            logits_processor=logits_processor,
            **inference_kwargs,
        )

        # required for multi-modal models that return a 2D tensor even when
        # num_return_sequences is 1
        num_samples = inference_kwargs.get("num_return_sequences", 1)
        if num_samples == 1 and len(generated_ids.shape) == 2:
            generated_ids = generated_ids.squeeze(0)

        return self._decode_generation(generated_ids)

    def generate_batch(
        self,
        model_input: List[Union[str, dict, Chat]],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> List[Union[str, List[str]]]:
        """"""
        prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore
        logits_processor = self.type_adapter.format_output_type(output_type)

        generated_ids = self._generate_output_seq(
            prompts, inputs, logits_processor=logits_processor, **inference_kwargs
        )

        # if there are multiple samples per input, convert generated_id to 3D
        num_samples = inference_kwargs.get("num_return_sequences", 1)
        if num_samples > 1:
            generated_ids = generated_ids.view(len(model_input), num_samples, -1)

        return self._decode_generation(generated_ids)

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `transformers` models.

        TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

        """
        raise NotImplementedError(
            "Streaming is not implemented for Transformers models."
        )

    def _generate_output_seq(self, prompts, inputs, **inference_kwargs):
        input_ids = inputs["input_ids"]

        output_ids = self.model.generate(
            **inputs,
            **inference_kwargs,
        )

        # encoder-decoder returns output_ids only, decoder-only returns full seq ids
        if self.model.config.is_encoder_decoder:
            generated_ids = output_ids
        else:
            generated_ids = output_ids[:, input_ids.shape[1] :]

        return generated_ids

    def _decode_generation(self, generated_ids: "torch.Tensor"):
        if len(generated_ids.shape) == 1:
            return self.tokenizer.decode([generated_ids])[0]
        elif len(generated_ids.shape) == 2:
            return self.tokenizer.decode(generated_ids)
        elif len(generated_ids.shape) == 3:
            return [
                self.tokenizer.decode(generated_ids[i])
                for i in range(len(generated_ids))
            ]
        else:  # pragma: no cover
            raise TypeError(
                "Generated outputs aren't 1D, 2D or 3D, but instead are "
                f"{generated_ids.shape}"
            )

`init(model, tokenizer, *, device_dtype=None)`

Parameters:

model A PreTrainedModel, or any model that is compatible with the transformers API for models. tokenizer A PreTrainedTokenizer, or any tokenizer that is compatible with the transformers API for tokenizers. device_dtype The dtype to use for the model. If not provided, the model will use the default dtype.

Source code in outlines/models/transformers.py

def __init__(
    self,
    model: "PreTrainedModel",
    tokenizer: "PreTrainedTokenizer",
    *,
    device_dtype: Optional["torch.dtype"] = None,
):
    """
    Parameters:
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    tokenizer
        A `PreTrainedTokenizer`, or any tokenizer that is compatible with
        the `transformers` API for tokenizers.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    """
    # We need to handle the cases in which jax/flax or tensorflow
    # is not available in the environment.
    try:
        from transformers import FlaxPreTrainedModel
    except ImportError:  # pragma: no cover
        FlaxPreTrainedModel = None

    try:
        from transformers import TFPreTrainedModel
    except ImportError:  # pragma: no cover
        TFPreTrainedModel = None

    tokenizer.padding_side = "left"
    self.model = model
    self.hf_tokenizer = tokenizer
    self.tokenizer = TransformerTokenizer(tokenizer)
    self.device_dtype = device_dtype
    self.type_adapter = TransformersTypeAdapter(
        tokenizer=tokenizer,
        has_chat_template=_check_hf_chat_template(tokenizer)
    )

    if (
        FlaxPreTrainedModel is not None
        and isinstance(model, FlaxPreTrainedModel)
    ):  # pragma: no cover
        self.tensor_library_name = "jax"
        warnings.warn("""
            Support for `jax` has been deprecated and will be removed in
            version 1.4.0 of Outlines. Please use `torch` instead.
            Transformers models using `jax` do not support structured
            generation.
            """,
            DeprecationWarning,
            stacklevel=2,
        )
    elif (
        TFPreTrainedModel is not None
        and isinstance(model, TFPreTrainedModel)
    ):  # pragma: no cover
        self.tensor_library_name = "tensorflow"
        warnings.warn("""
            Support for `tensorflow` has been deprecated and will be removed in
            version 1.4.0 of Outlines. Please use `torch` instead.
            Transformers models using `tensorflow` do not support structured
            generation.
            """,
            DeprecationWarning,
            stacklevel=2,
        )
    else:
        self.tensor_library_name = "torch"

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using transformers.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[str, dict, Chat]`	The prompt based on which the model will generate a response. For multi-modal models, the input should be a dictionary containing the `text` key with a value of type `Union[str, List[str]]` and the other keys required by the model.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method of the `transformers` model.	`{}`

Returns:

Type	Description
`Union[str, List[str]]`	The text generated by the model.

Source code in outlines/models/transformers.py

def generate(
    self,
    model_input: Union[str, dict, Chat],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str]]:
    """Generate text using `transformers`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response. For
        multi-modal models, the input should be a dictionary containing the
        `text` key with a value of type `Union[str, List[str]]` and the
        other keys required by the model.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        of the `transformers` model.

    Returns
    -------
    Union[str, List[str]]
        The text generated by the model.

    """
    prompts, inputs = self._prepare_model_inputs(model_input, False)
    logits_processor = self.type_adapter.format_output_type(output_type)

    generated_ids = self._generate_output_seq(
        prompts,
        inputs,
        logits_processor=logits_processor,
        **inference_kwargs,
    )

    # required for multi-modal models that return a 2D tensor even when
    # num_return_sequences is 1
    num_samples = inference_kwargs.get("num_return_sequences", 1)
    if num_samples == 1 and len(generated_ids.shape) == 2:
        generated_ids = generated_ids.squeeze(0)

    return self._decode_generation(generated_ids)

`generate_batch(model_input, output_type=None, **inference_kwargs)`

Source code in outlines/models/transformers.py

def generate_batch(
    self,
    model_input: List[Union[str, dict, Chat]],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> List[Union[str, List[str]]]:
    """"""
    prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore
    logits_processor = self.type_adapter.format_output_type(output_type)

    generated_ids = self._generate_output_seq(
        prompts, inputs, logits_processor=logits_processor, **inference_kwargs
    )

    # if there are multiple samples per input, convert generated_id to 3D
    num_samples = inference_kwargs.get("num_return_sequences", 1)
    if num_samples > 1:
        generated_ids = generated_ids.view(len(model_input), num_samples, -1)

    return self._decode_generation(generated_ids)

`generate_stream(model_input, output_type, **inference_kwargs)`

Not available for transformers models.

TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

Source code in outlines/models/transformers.py

def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `transformers` models.

    TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

    """
    raise NotImplementedError(
        "Streaming is not implemented for Transformers models."
    )

`TransformersMultiModal`

Bases: Transformers

Thin wrapper around a transformers model and a transformers processor.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and processor.

Source code in outlines/models/transformers.py

class TransformersMultiModal(Transformers):
    """Thin wrapper around a `transformers` model and a `transformers`
    processor.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    processor.

    """

    def __init__(
        self,
        model: "PreTrainedModel",
        processor,
        *,
        device_dtype: Optional["torch.dtype"] = None,
    ):
        """Create a TransformersMultiModal model instance

        We rely on the `__init__` method of the `Transformers` class to handle
        most of the initialization and then add elements specific to multimodal
        models.

        Parameters
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        processor
            A `ProcessorMixin` instance.
        device_dtype
            The dtype to use for the model. If not provided, the model will use
            the default dtype.

        """
        self.processor = processor
        self.processor.padding_side = "left"
        self.processor.pad_token = "[PAD]"

        tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

        super().__init__(model, tokenizer, device_dtype=device_dtype)

        self.type_adapter = TransformersMultiModalTypeAdapter(
            tokenizer=tokenizer
        )

    def _prepare_model_inputs(
        self,
        model_input,
        is_batch: bool = False,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        if is_batch:
            prompts = [
                self.type_adapter.format_input(item) for item in model_input
            ]
        else:
            prompts = self.type_adapter.format_input(model_input)

        # The expected format is a single dict
        if is_batch:
            merged_prompts = defaultdict(list)
            for d in prompts:
                for key, value in d.items():
                    if key == "text":
                        merged_prompts[key].append(value)
                    else:
                        merged_prompts[key].extend(value)
        else:
            merged_prompts = prompts # type: ignore

        inputs = self.processor(
            **merged_prompts, padding=True, return_tensors="pt"
        )
        if self.device_dtype is not None:
            inputs = inputs.to(self.model.device, dtype=self.device_dtype)
        else:
            inputs = inputs.to(self.model.device)

        return merged_prompts["text"], inputs

`init(model, processor, *, device_dtype=None)`

Create a TransformersMultiModal model instance

We rely on the __init__ method of the Transformers class to handle most of the initialization and then add elements specific to multimodal models.

Parameters:

Name	Type	Description	Default
`model`	`PreTrainedModel`	A `PreTrainedModel`, or any model that is compatible with the `transformers` API for models.	required
`processor`		A `ProcessorMixin` instance.	required
`device_dtype`	`Optional[dtype]`	The dtype to use for the model. If not provided, the model will use the default dtype.	`None`

Source code in outlines/models/transformers.py

def __init__(
    self,
    model: "PreTrainedModel",
    processor,
    *,
    device_dtype: Optional["torch.dtype"] = None,
):
    """Create a TransformersMultiModal model instance

    We rely on the `__init__` method of the `Transformers` class to handle
    most of the initialization and then add elements specific to multimodal
    models.

    Parameters
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    processor
        A `ProcessorMixin` instance.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    """
    self.processor = processor
    self.processor.padding_side = "left"
    self.processor.pad_token = "[PAD]"

    tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

    super().__init__(model, tokenizer, device_dtype=device_dtype)

    self.type_adapter = TransformersMultiModalTypeAdapter(
        tokenizer=tokenizer
    )

`VLLM`

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py

class VLLM(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "OpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("VLLM does not support batch inference.")

    def generate_stream(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`OpenAI`	An `openai.OpenAI` client instance.	required

Source code in outlines/models/vllm.py

def __init__(
    self,
    client: "OpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/vllm.py

def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py

def generate_stream(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`VLLMOffline`

Bases: Model

Thin wrapper around a vllm.LLM model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the vllm.LLM model.

Source code in outlines/models/vllm_offline.py

class VLLMOffline(Model):
    """Thin wrapper around a `vllm.LLM` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `vllm.LLM` model.

    """

    def __init__(self, model: "LLM"):
        """Create a VLLM model instance.

        Parameters
        ----------
        model
            A `vllm.LLM` model instance.

        """
        self.model = model
        self.tokenizer = self.model.get_tokenizer()
        self.type_adapter = VLLMOfflineTypeAdapter(has_chat_template=self._check_chat_template())

    def _build_generation_args(
        self,
        inference_kwargs: dict,
        output_type: Optional[Any] = None,
    ) -> "SamplingParams":
        """Create the `SamplingParams` object to pass to the `generate` method
        of the `vllm.LLM` model."""
        from vllm.sampling_params import StructuredOutputsParams, SamplingParams

        sampling_params = inference_kwargs.pop("sampling_params", None)

        if sampling_params is None:
            sampling_params = SamplingParams()

        output_type_args = self.type_adapter.format_output_type(output_type)
        if output_type_args:
            original_sampling_params_dict = {f: getattr(sampling_params, f) for f in sampling_params.__struct_fields__}
            sampling_params_dict = {**original_sampling_params_dict, "structured_outputs": StructuredOutputsParams(**output_type_args)}
            sampling_params = SamplingParams(**sampling_params_dict)

        return sampling_params

    def generate(
        self,
        model_input: Chat | str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str]]:
        """Generate text using vLLM offline.

        Parameters
        ----------
        prompt
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            in the `vllm.LLM` model.

        Returns
        -------
        Union[str, List[str]]
            The text generated by the model.

        """
        sampling_params = self._build_generation_args(
            inference_kwargs,
            output_type,
        )

        model_input = self.type_adapter.format_input(model_input)

        if isinstance(model_input, list):
            results = self.model.chat(
                messages=model_input,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        else:
            results = self.model.generate(
                prompts=model_input,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        results = [completion.text for completion in results[0].outputs]

        if len(results) == 1:
            return results[0]
        else:
            return results

    def generate_batch(
        self,
        model_input: List[Chat | str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[List[str], List[List[str]]]:
        """Generate a batch of completions using vLLM offline.

        Parameters
        ----------
        prompt
            The list of prompts based on which the model will generate a
            response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            in the `vllm.LLM` model.

        Returns
        -------
        Union[List[str], List[List[str]]]
            The text generated by the model.

        """
        sampling_params = self._build_generation_args(
            inference_kwargs,
            output_type,
        )

        model_inputs = [self.type_adapter.format_input(item) for item in model_input]

        if model_inputs and isinstance(model_inputs[0], list):
            results = self.model.chat(
                messages=model_inputs,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        else:
            results = self.model.generate(
                prompts=model_inputs,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        return [[sample.text for sample in batch.outputs] for batch in results]

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `vllm.LLM`.

        TODO: Implement the streaming functionality ourselves.

        """
        raise NotImplementedError(
            "Streaming is not available for the vLLM offline integration."
        )

    def _check_chat_template(self) -> bool:
        """Check if the tokenizer has a chat template."""
        # 1. Try HuggingFace-style chat template check (get_chat_template).
        # Only return early on True; on False or any exception fall through to
        # step 2 so that vLLM-style tokenizers are still handled correctly.
        if hasattr(self.tokenizer, "chat_template") or hasattr(self.tokenizer, "apply_chat_template"):
            try:
                from outlines.models.tokenizer import _check_hf_chat_template
                if _check_hf_chat_template(self.tokenizer):
                    return True
            except Exception:
                pass

        # 2. Try vLLM-style apply_chat_template (works for old and new vLLM).
        if hasattr(self.tokenizer, "apply_chat_template"):
            try:
                self.tokenizer.apply_chat_template([{"role": "user", "content": "test"}])
                return True
            except Exception:
                pass

        # 3. Default: no chat template
        return False

`init(model)`

Create a VLLM model instance.

Parameters:

Name	Type	Description	Default
`model`	`LLM`	A `vllm.LLM` model instance.	required

Source code in outlines/models/vllm_offline.py

def __init__(self, model: "LLM"):
    """Create a VLLM model instance.

    Parameters
    ----------
    model
        A `vllm.LLM` model instance.

    """
    self.model = model
    self.tokenizer = self.model.get_tokenizer()
    self.type_adapter = VLLMOfflineTypeAdapter(has_chat_template=self._check_chat_template())

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using vLLM offline.

Parameters:

Name	Type	Description	Default
`prompt`		The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method in the `vllm.LLM` model.	`{}`

Returns:

Type	Description
`Union[str, List[str]]`	The text generated by the model.

Source code in outlines/models/vllm_offline.py

def generate(
    self,
    model_input: Chat | str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str]]:
    """Generate text using vLLM offline.

    Parameters
    ----------
    prompt
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        in the `vllm.LLM` model.

    Returns
    -------
    Union[str, List[str]]
        The text generated by the model.

    """
    sampling_params = self._build_generation_args(
        inference_kwargs,
        output_type,
    )

    model_input = self.type_adapter.format_input(model_input)

    if isinstance(model_input, list):
        results = self.model.chat(
            messages=model_input,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    else:
        results = self.model.generate(
            prompts=model_input,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    results = [completion.text for completion in results[0].outputs]

    if len(results) == 1:
        return results[0]
    else:
        return results

`generate_batch(model_input, output_type=None, **inference_kwargs)`

Generate a batch of completions using vLLM offline.

Parameters:

Name	Type	Description	Default
`prompt`		The list of prompts based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method in the `vllm.LLM` model.	`{}`

Returns:

Type	Description
`Union[List[str], List[List[str]]]`	The text generated by the model.

Source code in outlines/models/vllm_offline.py

def generate_batch(
    self,
    model_input: List[Chat | str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[List[str], List[List[str]]]:
    """Generate a batch of completions using vLLM offline.

    Parameters
    ----------
    prompt
        The list of prompts based on which the model will generate a
        response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        in the `vllm.LLM` model.

    Returns
    -------
    Union[List[str], List[List[str]]]
        The text generated by the model.

    """
    sampling_params = self._build_generation_args(
        inference_kwargs,
        output_type,
    )

    model_inputs = [self.type_adapter.format_input(item) for item in model_input]

    if model_inputs and isinstance(model_inputs[0], list):
        results = self.model.chat(
            messages=model_inputs,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    else:
        results = self.model.generate(
            prompts=model_inputs,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    return [[sample.text for sample in batch.outputs] for batch in results]

`generate_stream(model_input, output_type, **inference_kwargs)`

Not available for vllm.LLM.

TODO: Implement the streaming functionality ourselves.

Source code in outlines/models/vllm_offline.py

def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `vllm.LLM`.

    TODO: Implement the streaming functionality ourselves.

    """
    raise NotImplementedError(
        "Streaming is not available for the vLLM offline integration."
    )

`from_anthropic(client, model_name=None)`

Create an Outlines Anthropic model instance from an anthropic.Anthropic client instance.

Parameters:

Name	Type	Description	Default
`client`	`Anthropic`	An `anthropic.Anthropic` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Anthropic`	An Outlines `Anthropic` model instance.

Source code in outlines/models/anthropic.py

def from_anthropic(
    client: "AnthropicClient", model_name: Optional[str] = None
) -> Anthropic:
    """Create an Outlines `Anthropic` model instance from an
    `anthropic.Anthropic` client instance.

    Parameters
    ----------
    client
        An `anthropic.Anthropic` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Anthropic
        An Outlines `Anthropic` model instance.

    """
    return Anthropic(client, model_name)

`from_dottxt(client, model=None)`

Create an Outlines Dottxt or AsyncDottxt model instance from a dottxt.DotTxt or dottxt.AsyncDotTxt client instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[DotTxt, AsyncDotTxt]`	A `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Returns:

Type	Description
`Union[Dottxt, AsyncDottxt]`	An Outlines `Dottxt` or `AsyncDottxt` model instance.

Source code in outlines/models/dottxt.py

def from_dottxt(
    client: "Union[DottxtClient, AsyncDottxtClient]",
    model: Optional[str] = None,
) -> Union[Dottxt, AsyncDottxt]:
    """Create an Outlines `Dottxt` or `AsyncDottxt` model instance from a
    `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.

    Parameters
    ----------
    client
        A `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    Returns
    -------
    Union[Dottxt, AsyncDottxt]
        An Outlines `Dottxt` or `AsyncDottxt` model instance.

    """
    from dottxt import AsyncDotTxt, DotTxt

    if isinstance(client, DotTxt):
        return Dottxt(client, model)
    elif isinstance(client, AsyncDotTxt):
        return AsyncDottxt(client, model)
    else:
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "`dottxt.DotTxt` or `dottxt.AsyncDotTxt`."
        )

`from_gemini(client, model_name=None)`

Create an Outlines Gemini model instance from a google.genai.Client instance.

Parameters:

Name	Type	Description	Default
`client`	`Client`	A `google.genai.Client` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Gemini`	An Outlines `Gemini` model instance.

Source code in outlines/models/gemini.py

def from_gemini(client: "Client", model_name: Optional[str] = None) -> Gemini:
    """Create an Outlines `Gemini` model instance from a
    `google.genai.Client` instance.

    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Gemini
        An Outlines `Gemini` model instance.

    """
    return Gemini(client, model_name)

`from_llamacpp(model, chat_mode=True)`

Create an Outlines LlamaCpp model instance from a llama_cpp.Llama instance.

Parameters:

Name	Type	Description	Default
`model`	`Llama`	A `llama_cpp.Llama` instance.	required
`chat_mode`	`bool`	Whether to enable chat mode. If `False`, the model will regard all `str` inputs as plain text prompts. If `True`, the model will regard all `str` inputs as user messages in a chat conversation.	`True`

Returns:

Type	Description
`LlamaCpp`	An Outlines `LlamaCpp` model instance.

Source code in outlines/models/llamacpp.py

def from_llamacpp(model: "Llama", chat_mode: bool = True) -> LlamaCpp:
    """Create an Outlines `LlamaCpp` model instance from a
    `llama_cpp.Llama` instance.

    Parameters
    ----------
    model
        A `llama_cpp.Llama` instance.
    chat_mode
        Whether to enable chat mode. If `False`, the model will regard
        all `str` inputs as plain text prompts. If `True`, the model will
        regard all `str` inputs as user messages in a chat conversation.

    Returns
    -------
    LlamaCpp
        An Outlines `LlamaCpp` model instance.

    """
    return LlamaCpp(model, chat_mode=chat_mode)

`from_lmstudio(client, model_name=None)`

Create an Outlines LMStudio model instance from a lmstudio.Client or lmstudio.AsyncClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[Client, AsyncClient]`	A `lmstudio.Client` or `lmstudio.AsyncClient` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[LMStudio, AsyncLMStudio]`	An Outlines `LMStudio` or `AsyncLMStudio` model instance.

Source code in outlines/models/lmstudio.py

def from_lmstudio(
    client: Union["Client", "AsyncClient"],
    model_name: Optional[str] = None,
) -> Union[LMStudio, AsyncLMStudio]:
    """Create an Outlines `LMStudio` model instance from a
    `lmstudio.Client` or `lmstudio.AsyncClient` instance.

    Parameters
    ----------
    client
        A `lmstudio.Client` or `lmstudio.AsyncClient` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[LMStudio, AsyncLMStudio]
        An Outlines `LMStudio` or `AsyncLMStudio` model instance.

    """
    from lmstudio import AsyncClient, Client

    if isinstance(client, Client):
        return LMStudio(client, model_name)
    elif isinstance(client, AsyncClient):
        return AsyncLMStudio(client, model_name)
    else:
        raise ValueError(
            "Invalid client type, the client must be an instance of "
            "`lmstudio.Client` or `lmstudio.AsyncClient`."
        )

`from_mistral(client, model_name=None, async_client=False)`

Create an Outlines Mistral model instance from a mistralai.Mistral client.

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`
`async_client`	`bool`	If True, return an AsyncMistral instance; otherwise, return a Mistral instance.	`False`

Returns:

Type	Description
`Union[Mistral, AsyncMistral]`	An Outlines Mistral or AsyncMistral model instance.

Source code in outlines/models/mistral.py

def from_mistral(
    client: "MistralClient",
    model_name: Optional[str] = None,
    async_client: bool = False,
) -> Union[Mistral, AsyncMistral]:
    """Create an Outlines Mistral model instance from a mistralai.Mistral
    client.

    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.
    async_client : bool
        If True, return an AsyncMistral instance;
        otherwise, return a Mistral instance.

    Returns
    -------
    Union[Mistral, AsyncMistral]
        An Outlines Mistral or AsyncMistral model instance.

    """
    from mistralai import Mistral as MistralClient

    if not isinstance(client, MistralClient):
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "`mistralai.Mistral`."
        )

    if async_client:
        return AsyncMistral(client, model_name)
    else:
        return Mistral(client, model_name)

`from_mlxlm(model, tokenizer)`

Create an Outlines MLXLM model instance from an mlx_lm model and a tokenizer.

Parameters:

Name	Type	Description	Default
`model`	`Module`	An instance of an `mlx_lm` model.	required
`tokenizer`	`MLXTokenizer`	An instance of an `mlx_lm` tokenizer or of a compatible transformers tokenizer.	required

Returns:

Type	Description
`MLXLM`	An Outlines `MLXLM` model instance.

Source code in outlines/models/mlxlm.py

def from_mlxlm(model: "nn.Module", tokenizer: "MLXTokenizer") -> MLXLM:
    """Create an Outlines `MLXLM` model instance from an `mlx_lm` model and a
    tokenizer.

    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        transformers tokenizer.

    Returns
    -------
    MLXLM
        An Outlines `MLXLM` model instance.

    """
    return MLXLM(model, tokenizer)

`from_ollama(client, model_name=None)`

Create an Outlines Ollama model instance from an ollama.Client or ollama.AsyncClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[Client, AsyncClient]`	A `ollama.Client` or `ollama.AsyncClient` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[Ollama, AsyncOllama]`	An Outlines `Ollama` or `AsyncOllama` model instance.

Source code in outlines/models/ollama.py

def from_ollama(
    client: Union["Client", "AsyncClient"], model_name: Optional[str] = None
) -> Union[Ollama, AsyncOllama]:
    """Create an Outlines `Ollama` model instance from an `ollama.Client`
    or `ollama.AsyncClient` instance.

    Parameters
    ----------
    client
        A `ollama.Client` or `ollama.AsyncClient` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[Ollama, AsyncOllama]
        An Outlines `Ollama` or `AsyncOllama` model instance.

    """
    from ollama import AsyncClient, Client

    if isinstance(client, Client):
        return Ollama(client, model_name)
    elif isinstance(client, AsyncClient):
        return AsyncOllama(client, model_name)
    else:
        raise ValueError(
            "Invalid client type, the client must be an instance of "
            "`ollama.Client` or `ollama.AsyncClient`."
        )

`from_openai(client, model_name=None)`

Create an Outlines OpenAI or AsyncOpenAI model instance from an openai.OpenAI or openai.AsyncOpenAI client.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]`	An `openai.OpenAI`, `openai.AsyncOpenAI`, `openai.AzureOpenAI` or `openai.AsyncAzureOpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`OpenAI`	An Outlines `OpenAI` or `AsyncOpenAI` model instance.

Source code in outlines/models/openai.py

def from_openai(
    client: Union[
        "OpenAIClient",
        "AsyncOpenAIClient",
        "AzureOpenAIClient",
        "AsyncAzureOpenAIClient",
    ],
    model_name: Optional[str] = None,
) -> Union[OpenAI, AsyncOpenAI]:
    """Create an Outlines `OpenAI` or `AsyncOpenAI` model instance from an
    `openai.OpenAI` or `openai.AsyncOpenAI` client.

    Parameters
    ----------
    client
        An `openai.OpenAI`, `openai.AsyncOpenAI`, `openai.AzureOpenAI` or
        `openai.AsyncAzureOpenAI` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    OpenAI
        An Outlines `OpenAI` or `AsyncOpenAI` model instance.

    """
    import openai

    if isinstance(client, openai.OpenAI):
        return OpenAI(client, model_name)
    elif isinstance(client, openai.AsyncOpenAI):
        return AsyncOpenAI(client, model_name)
    else:
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "+ `openai.OpenAI` or `openai.AsyncOpenAI`."
        )

`from_sglang(client, model_name=None)`

Create a SGLang or AsyncSGLang instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI]`	An `openai.OpenAI` or `openai.AsyncOpenAI` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[SGLang, AsyncSGLang]`	An Outlines `SGLang` or `AsyncSGLang` model instance.

Source code in outlines/models/sglang.py

def from_sglang(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[SGLang, AsyncSGLang]:
    """Create a `SGLang` or `AsyncSGLang` instance from an `openai.OpenAI` or
    `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[SGLang, AsyncSGLang]
        An Outlines `SGLang` or `AsyncSGLang` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return SGLang(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncSGLang(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

`from_tgi(client)`

Create an Outlines TGI or AsyncTGI model instance from an huggingface_hub.InferenceClient or huggingface_hub.AsyncInferenceClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[InferenceClient, AsyncInferenceClient]`	An `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient` instance.	required

Returns:

Type	Description
`Union[TGI, AsyncTGI]`	An Outlines `TGI` or `AsyncTGI` model instance.

Source code in outlines/models/tgi.py

def from_tgi(
    client: Union["InferenceClient", "AsyncInferenceClient"],
) -> Union[TGI, AsyncTGI]:
    """Create an Outlines `TGI` or `AsyncTGI` model instance from an
    `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient`
    instance.

    Parameters
    ----------
    client
        An `huggingface_hub.InferenceClient` or
        `huggingface_hub.AsyncInferenceClient` instance.

    Returns
    -------
    Union[TGI, AsyncTGI]
        An Outlines `TGI` or `AsyncTGI` model instance.

    """
    from huggingface_hub import AsyncInferenceClient, InferenceClient

    if isinstance(client, InferenceClient):
        return TGI(client)
    elif isinstance(client, AsyncInferenceClient):
        return AsyncTGI(client)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            + "Please provide an HuggingFace InferenceClient "
            + "or AsyncInferenceClient instance."
        )

`from_transformers(model, tokenizer_or_processor, *, device_dtype=None)`

Create an Outlines Transformers or TransformersMultiModal model instance from a PreTrainedModel instance and a PreTrainedTokenizer or ProcessorMixin instance.

outlines supports PreTrainedModelForCausalLM, PreTrainedMambaForCausalLM, PreTrainedModelForSeq2Seq and any model that implements the transformers model API.

Parameters:

Name	Type	Description	Default
`model`	`PreTrainedModel`	A `transformers.PreTrainedModel` instance.	required
`tokenizer_or_processor`	`Union[PreTrainedTokenizer, ProcessorMixin]`	A `transformers.PreTrainedTokenizer` or `transformers.ProcessorMixin` instance.	required
`device_dtype`	`Optional[dtype]`	The dtype to use for the model. If not provided, the model will use the default dtype.	`None`

Returns:

Type	Description
`Union[Transformers, TransformersMultiModal]`	An Outlines `Transformers` or `TransformersMultiModal` model instance.

Source code in outlines/models/transformers.py

def from_transformers(
    model: "PreTrainedModel",
    tokenizer_or_processor: Union["PreTrainedTokenizer", "ProcessorMixin"],
    *,
    device_dtype: Optional["torch.dtype"] = None,
) -> Union[Transformers, TransformersMultiModal]:
    """Create an Outlines `Transformers` or `TransformersMultiModal` model
    instance from a `PreTrainedModel` instance and a `PreTrainedTokenizer` or
    `ProcessorMixin` instance.

    `outlines` supports `PreTrainedModelForCausalLM`,
    `PreTrainedMambaForCausalLM`, `PreTrainedModelForSeq2Seq` and any model
    that implements the `transformers` model API.

    Parameters
    ----------
    model
        A `transformers.PreTrainedModel` instance.
    tokenizer_or_processor
        A `transformers.PreTrainedTokenizer` or
        `transformers.ProcessorMixin` instance.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    Returns
    -------
    Union[Transformers, TransformersMultiModal]
        An Outlines `Transformers` or `TransformersMultiModal` model instance.

    """
    from transformers import (
        PreTrainedTokenizer, PreTrainedTokenizerFast, ProcessorMixin)

    if isinstance(
        tokenizer_or_processor, (PreTrainedTokenizer, PreTrainedTokenizerFast)
    ):
        tokenizer = tokenizer_or_processor
        return Transformers(model, tokenizer, device_dtype=device_dtype)
    elif isinstance(tokenizer_or_processor, ProcessorMixin):
        processor = tokenizer_or_processor
        return TransformersMultiModal(model, processor, device_dtype=device_dtype)
    else:
        raise ValueError(
            "We couldn't determine whether the model passed to `from_transformers`"
            + " is a text-2-text or a multi-modal model. Please provide a "
            + "a transformers tokenizer or processor."
        )

`from_vllm(client, model_name=None)`

Create an Outlines VLLM or AsyncVLLM model instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI]`	An `openai.OpenAI` or `openai.AsyncOpenAI` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[VLLM, AsyncVLLM]`	An Outlines `VLLM` or `AsyncVLLM` model instance.

Source code in outlines/models/vllm.py

def from_vllm(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[VLLM, AsyncVLLM]:
    """Create an Outlines `VLLM` or `AsyncVLLM` model instance from an
    `openai.OpenAI` or `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[VLLM, AsyncVLLM]
        An Outlines `VLLM` or `AsyncVLLM` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return VLLM(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncVLLM(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

`from_vllm_offline(model)`

Create an Outlines VLLMOffline model instance from a vllm.LLM instance.

Parameters:

Name	Type	Description	Default
`model`	`LLM`	A `vllm.LLM` instance.	required

Returns:

Type	Description
`VLLMOffline`	An Outlines `VLLMOffline` model instance.

Source code in outlines/models/vllm_offline.py

def from_vllm_offline(model: "LLM") -> VLLMOffline:
    """Create an Outlines `VLLMOffline` model instance from a `vllm.LLM`
    instance.

    Parameters
    ----------
    model
        A `vllm.LLM` instance.

    Returns
    -------
    VLLMOffline
        An Outlines `VLLMOffline` model instance.

    """
    return VLLMOffline(model)

`anthropic`

Integration with Anthropic's API.

`Anthropic`

Bases: Model

Thin wrapper around the anthropic.Anthropic client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the anthropic.Anthropic client.

Source code in outlines/models/anthropic.py

class Anthropic(Model):
    """Thin wrapper around the `anthropic.Anthropic` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `anthropic.Anthropic` client.

    """
    def __init__(
        self, client: "AnthropicClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client
            An `anthropic.Anthropic` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = AnthropicTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            completion = self.client.messages.create(
                **messages,
                **inference_kwargs,
            )
        return completion.content[0].text

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Anthropic does not support batch generation."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.messages.create(
                **messages,
                stream=True,
                **inference_kwargs,
            )
            for chunk in stream:
                if (
                    chunk.type == "content_block_delta"
                    and chunk.delta.type == "text_delta"
                ):
                    yield chunk.delta.text

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Anthropic`	An `anthropic.Anthropic` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/anthropic.py

def __init__(
    self, client: "AnthropicClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client
        An `anthropic.Anthropic` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = AnthropicTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using Anthropic.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	As structured generation is not supported by Anthropic, the value of this argument must be `None`. Otherwise, an error will be raised at runtime.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The response generated by the model.

Source code in outlines/models/anthropic.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        completion = self.client.messages.create(
            **messages,
            **inference_kwargs,
        )
    return completion.content[0].text

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using Anthropic.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	As structured generation is not supported by Anthropic, the value of this argument must be `None`. Otherwise, an error will be raised at runtime.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/anthropic.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.messages.create(
            **messages,
            stream=True,
            **inference_kwargs,
        )
        for chunk in stream:
            if (
                chunk.type == "content_block_delta"
                and chunk.delta.type == "text_delta"
            ):
                yield chunk.delta.text

`AnthropicTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the Anthropic model.

AnthropicTypeAdapter is responsible for preparing the arguments to Anthropic's messages.create method: the input (prompt and possibly image). Anthropic does not support defining the output type, so format_output_type is not implemented.

Source code in outlines/models/anthropic.py

class AnthropicTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Anthropic` model.

    `AnthropicTypeAdapter` is responsible for preparing the arguments to
    Anthropic's `messages.create` method: the input (prompt and possibly
    image).
    Anthropic does not support defining the output type, so
    `format_output_type` is not implemented.

    """

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the `messages` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        dict
            The `messages` argument to pass to the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "Anthropic. The only available types are `str`, `list` and `Chat` "
            "(containing a prompt and images)."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> dict:
        return {
            "messages": [self._create_message("user", model_input)]
        }

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> dict:
        return {
            "messages": [
                self._create_message("user", model_input)
            ]
        }

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> dict:
        """Generate the `messages` argument to pass to the client when the user
        passes a Chat instance.

        """
        return {
            "messages": [
                self._create_message(message["role"], message["content"])
                for message in model_input.messages
            ]
        }

    def _create_message(self, role: str, content: str | list) -> dict:
        """Create a message."""

        if isinstance(content, str):
            return {
                "role": role,
                "content": content,
            }

        elif isinstance(content, list):
            prompt = content[0]
            images = content[1:]

            if not all(isinstance(image, Image) for image in images):
                raise ValueError("All assets provided must be of type Image")

            image_content_messages = [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": image.image_format,
                        "data": image.image_str,
                    },
                }
                for image in images
            ]

            return {
                "role": role,
                "content": [
                    *image_content_messages,
                    {"type": "text", "text": prompt},
                ],
            }

        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                "The content must be a string or a list containing a string "
                "and a list of images."
            )

    def format_output_type(self, output_type):
        """Not implemented for Anthropic."""
        if output_type is None:
            return {}
        else:
            raise NotImplementedError(
                f"The output type {output_type} is not available with "
                "Anthropic."
            )

`format_chat_model_input(model_input)`

Generate the messages argument to pass to the client when the user passes a Chat instance.

Source code in outlines/models/anthropic.py

@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> dict:
    """Generate the `messages` argument to pass to the client when the user
    passes a Chat instance.

    """
    return {
        "messages": [
            self._create_message(message["role"], message["content"])
            for message in model_input.messages
        ]
    }

`format_input(model_input)`

Generate the messages argument to pass to the client.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`dict`	The `messages` argument to pass to the client.

Source code in outlines/models/anthropic.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the `messages` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    dict
        The `messages` argument to pass to the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "Anthropic. The only available types are `str`, `list` and `Chat` "
        "(containing a prompt and images)."
    )

`format_output_type(output_type)`

Not implemented for Anthropic.

Source code in outlines/models/anthropic.py

def format_output_type(self, output_type):
    """Not implemented for Anthropic."""
    if output_type is None:
        return {}
    else:
        raise NotImplementedError(
            f"The output type {output_type} is not available with "
            "Anthropic."
        )

`from_anthropic(client, model_name=None)`

Create an Outlines Anthropic model instance from an anthropic.Anthropic client instance.

Parameters:

Name	Type	Description	Default
`client`	`Anthropic`	An `anthropic.Anthropic` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Anthropic`	An Outlines `Anthropic` model instance.

Source code in outlines/models/anthropic.py

def from_anthropic(
    client: "AnthropicClient", model_name: Optional[str] = None
) -> Anthropic:
    """Create an Outlines `Anthropic` model instance from an
    `anthropic.Anthropic` client instance.

    Parameters
    ----------
    client
        An `anthropic.Anthropic` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Anthropic
        An Outlines `Anthropic` model instance.

    """
    return Anthropic(client, model_name)

`base`

Base classes for all models and model type adapters.

`AsyncModel`

Bases: ABC

Base class for all asynchronous models.

This class defines shared __call__, batch and stream methods that can be used to call the model directly. The generate, generate_batch, and generate_stream methods must be implemented by the subclasses. All models inheriting from this class must define a type_adapter attribute of type ModelTypeAdapter. The methods of the type_adapter attribute are used in the generate, generate_batch, and generate_stream methods to format the input and output types received by the model. Additionally, steerable models must define a tensor_library_name attribute.

Source code in outlines/models/base.py

class AsyncModel(ABC):
    """Base class for all asynchronous models.

    This class defines shared `__call__`, `batch` and `stream` methods that can
    be used to call the model directly. The `generate`, `generate_batch`, and
    `generate_stream` methods must be implemented by the subclasses.
    All models inheriting from this class must define a `type_adapter`
    attribute of type `ModelTypeAdapter`. The methods of the `type_adapter`
    attribute are used in the `generate`, `generate_batch`, and
    `generate_stream` methods to format the input and output types received by
    the model.
    Additionally, steerable models must define a `tensor_library_name`
    attribute.

    """
    type_adapter: ModelTypeAdapter
    tensor_library_name: str

    async def __call__(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Call the model.

        Users can call the model directly, in which case we will create a
        generator instance with the output type provided and call it.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        await generator("prompt")
        ```
        and
        ```python
        await model("prompt", Foo)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return await generator(model_input, **inference_kwargs)

    async def batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Make a batch call to the model (several inputs at once).

        Users can use the `batch` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `batch` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        await generator.batch(["prompt1", "prompt2"])
        ```
        and
        ```python
        await model.batch(["prompt1", "prompt2"], Foo)
        ```

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return await generator.batch(model_input, **inference_kwargs) # type: ignore

    async def stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> AsyncIterator[Any]:
        """Stream a response from the model.

        Users can use the `stream` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `stream` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        async for chunk in generator("prompt"):
            print(chunk)
        ```
        and
        ```python
        async for chunk in model.stream("prompt", Foo):
            print(chunk)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        AsyncIterator[Any]
            A stream of responses from the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)

        async for chunk in generator.stream(model_input, **inference_kwargs):  # type: ignore
            yield chunk

    @abstractmethod
    async def generate(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Generate a response from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        ...

    @abstractmethod
    async def generate_batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Generate a batch of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        ...

    @abstractmethod
    async def generate_stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> AsyncIterator[Any]:
        """Generate a stream of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        AsyncIterator[Any]
            A coroutine that will produce an async iterator of responses from the model.

        """
        ...

`call(model_input, output_type=None, backend=None, **inference_kwargs)` `async`

Call the model.

Users can call the model directly, in which case we will create a generator instance with the output type provided and call it. Thus, those commands are equivalent:

generator = Generator(model, Foo)
await generator("prompt")

and

await model("prompt", Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

async def __call__(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> Any:
    """Call the model.

    Users can call the model directly, in which case we will create a
    generator instance with the output type provided and call it.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    await generator("prompt")
    ```
    and
    ```python
    await model("prompt", Foo)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return await generator(model_input, **inference_kwargs)

`batch(model_input, output_type=None, backend=None, **inference_kwargs)` `async`

Make a batch call to the model (several inputs at once).

Users can use the batch method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its batch method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
await generator.batch(["prompt1", "prompt2"])

and

await model.batch(["prompt1", "prompt2"], Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

async def batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Make a batch call to the model (several inputs at once).

    Users can use the `batch` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `batch` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    await generator.batch(["prompt1", "prompt2"])
    ```
    and
    ```python
    await model.batch(["prompt1", "prompt2"], Foo)
    ```

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return await generator.batch(model_input, **inference_kwargs) # type: ignore

`generate(model_input, output_type=None, **inference_kwargs)` `abstractmethod` `async`

Generate a response from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

@abstractmethod
async def generate(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Generate a response from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    ...

`generate_batch(model_input, output_type=None, **inference_kwargs)` `abstractmethod` `async`

Generate a batch of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

@abstractmethod
async def generate_batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Generate a batch of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    ...

`generate_stream(model_input, output_type=None, **inference_kwargs)` `abstractmethod` `async`

Generate a stream of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`AsyncIterator[Any]`	A coroutine that will produce an async iterator of responses from the model.

Source code in outlines/models/base.py

@abstractmethod
async def generate_stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> AsyncIterator[Any]:
    """Generate a stream of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    AsyncIterator[Any]
        A coroutine that will produce an async iterator of responses from the model.

    """
    ...

`stream(model_input, output_type=None, backend=None, **inference_kwargs)` `async`

Stream a response from the model.

Users can use the stream method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its stream method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
async for chunk in generator("prompt"):
    print(chunk)

and

async for chunk in model.stream("prompt", Foo):
    print(chunk)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`AsyncIterator[Any]`	A stream of responses from the model.

Source code in outlines/models/base.py

async def stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> AsyncIterator[Any]:
    """Stream a response from the model.

    Users can use the `stream` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `stream` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    async for chunk in generator("prompt"):
        print(chunk)
    ```
    and
    ```python
    async for chunk in model.stream("prompt", Foo):
        print(chunk)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    AsyncIterator[Any]
        A stream of responses from the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)

    async for chunk in generator.stream(model_input, **inference_kwargs):  # type: ignore
        yield chunk

`Model`

Bases: ABC

Base class for all synchronous models.

This class defines shared __call__, batch and stream methods that can be used to call the model directly. The generate, generate_batch, and generate_stream methods must be implemented by the subclasses. All models inheriting from this class must define a type_adapter attribute of type ModelTypeAdapter. The methods of the type_adapter attribute are used in the generate, generate_batch, and generate_stream methods to format the input and output types received by the model. Additionally, steerable models must define a tensor_library_name attribute.

Source code in outlines/models/base.py

class Model(ABC):
    """Base class for all synchronous models.

    This class defines shared `__call__`, `batch` and `stream` methods that can
    be used to call the model directly. The `generate`, `generate_batch`, and
    `generate_stream` methods must be implemented by the subclasses.
    All models inheriting from this class must define a `type_adapter`
    attribute of type `ModelTypeAdapter`. The methods of the `type_adapter`
    attribute are used in the `generate`, `generate_batch`, and
    `generate_stream` methods to format the input and output types received by
    the model.
    Additionally, steerable models must define a `tensor_library_name`
    attribute.

    """
    type_adapter: ModelTypeAdapter
    tensor_library_name: str

    def __call__(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Call the model.

        Users can call the model directly, in which case we will create a
        generator instance with the output type provided and call it.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        generator("prompt")
        ```
        and
        ```python
        model("prompt", Foo)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        from outlines.generator import Generator

        return Generator(self, output_type, backend)(model_input, **inference_kwargs)

    def batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Make a batch call to the model (several inputs at once).

        Users can use the `batch` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `batch` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        generator.batch(["prompt1", "prompt2"])
        ```
        and
        ```python
        model.batch(["prompt1", "prompt2"], Foo)
        ```

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return generator.batch(model_input, **inference_kwargs) # type: ignore

    def stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Stream a response from the model.

        Users can use the `stream` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `stream` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        for chunk in generator("prompt"):
            print(chunk)
        ```
        and
        ```python
        for chunk in model.stream("prompt", Foo):
            print(chunk)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return generator.stream(model_input, **inference_kwargs) # type: ignore

    @abstractmethod
    def generate(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Generate a response from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        ...

    @abstractmethod
    def generate_batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Generate a batch of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        ...
    @abstractmethod
    def generate_stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Generate a stream of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        ...

`call(model_input, output_type=None, backend=None, **inference_kwargs)`

Call the model.

Users can call the model directly, in which case we will create a generator instance with the output type provided and call it. Thus, those commands are equivalent:

generator = Generator(model, Foo)
generator("prompt")

and

model("prompt", Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

def __call__(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> Any:
    """Call the model.

    Users can call the model directly, in which case we will create a
    generator instance with the output type provided and call it.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    generator("prompt")
    ```
    and
    ```python
    model("prompt", Foo)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    from outlines.generator import Generator

    return Generator(self, output_type, backend)(model_input, **inference_kwargs)

`batch(model_input, output_type=None, backend=None, **inference_kwargs)`

Make a batch call to the model (several inputs at once).

Users can use the batch method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its batch method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
generator.batch(["prompt1", "prompt2"])

and

model.batch(["prompt1", "prompt2"], Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

def batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Make a batch call to the model (several inputs at once).

    Users can use the `batch` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `batch` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    generator.batch(["prompt1", "prompt2"])
    ```
    and
    ```python
    model.batch(["prompt1", "prompt2"], Foo)
    ```

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return generator.batch(model_input, **inference_kwargs) # type: ignore

`generate(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a response from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

@abstractmethod
def generate(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Generate a response from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    ...

`generate_batch(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a batch of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

@abstractmethod
def generate_batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Generate a batch of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    ...

`generate_stream(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a stream of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[Any]`	A stream of responses from the model.

Source code in outlines/models/base.py

@abstractmethod
def generate_stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Generate a stream of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    ...

`stream(model_input, output_type=None, backend=None, **inference_kwargs)`

Stream a response from the model.

Users can use the stream method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its stream method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
for chunk in generator("prompt"):
    print(chunk)

and

for chunk in model.stream("prompt", Foo):
    print(chunk)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[Any]`	A stream of responses from the model.

Source code in outlines/models/base.py

def stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Stream a response from the model.

    Users can use the `stream` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `stream` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    for chunk in generator("prompt"):
        print(chunk)
    ```
    and
    ```python
    for chunk in model.stream("prompt", Foo):
        print(chunk)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return generator.stream(model_input, **inference_kwargs) # type: ignore

`ModelTypeAdapter`

Bases: ABC

Base class for all model type adapters.

A type adapter instance must be given as a value to the type_adapter attribute when instantiating a model. The type adapter is responsible for formatting the input and output types passed to the model to match the specific format expected by the associated model.

Source code in outlines/models/base.py

class ModelTypeAdapter(ABC):
    """Base class for all model type adapters.

    A type adapter instance must be given as a value to the `type_adapter`
    attribute when instantiating a model.
    The type adapter is responsible for formatting the input and output types
    passed to the model to match the specific format expected by the
    associated model.

    """

    @abstractmethod
    def format_input(self, model_input: Any) -> Any:
        """Format the user input to the expected format of the model.

        For API-based models, it typically means creating the `messages`
        argument passed to the client. For local models, it can mean casting
        the input from str to list for instance.
        This method is also used to validate that the input type provided by
        the user is supported by the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        Any
            The formatted input to be passed to the model.

        """
        ...

    @abstractmethod
    def format_output_type(self, output_type: Optional[Any] = None) -> Any:
        """Format the output type to the expected format of the model.

        For black-box models, this typically means creating a `response_format`
        argument. For steerable models, it means formatting the logits processor
        to create the object type expected by the model.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        Any
            The formatted output type to be passed to the model.

        """
        ...

`format_input(model_input)` `abstractmethod`

Format the user input to the expected format of the model.

For API-based models, it typically means creating the messages argument passed to the client. For local models, it can mean casting the input from str to list for instance. This method is also used to validate that the input type provided by the user is supported by the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required

Returns:

Type	Description
`Any`	The formatted input to be passed to the model.

Source code in outlines/models/base.py

@abstractmethod
def format_input(self, model_input: Any) -> Any:
    """Format the user input to the expected format of the model.

    For API-based models, it typically means creating the `messages`
    argument passed to the client. For local models, it can mean casting
    the input from str to list for instance.
    This method is also used to validate that the input type provided by
    the user is supported by the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    Any
        The formatted input to be passed to the model.

    """
    ...

`format_output_type(output_type=None)` `abstractmethod`

Format the output type to the expected format of the model.

For black-box models, this typically means creating a response_format argument. For steerable models, it means formatting the logits processor to create the object type expected by the model.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`Any`	The formatted output type to be passed to the model.

Source code in outlines/models/base.py

@abstractmethod
def format_output_type(self, output_type: Optional[Any] = None) -> Any:
    """Format the output type to the expected format of the model.

    For black-box models, this typically means creating a `response_format`
    argument. For steerable models, it means formatting the logits processor
    to create the object type expected by the model.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    Any
        The formatted output type to be passed to the model.

    """
    ...

`dottxt`

Integration with Dottxt's API.

`AsyncDottxt`

Bases: AsyncModel

Async thin wrapper around the dottxt.client.AsyncDotTxt client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the dottxt.client.AsyncDotTxt client.

Source code in outlines/models/dottxt.py

class AsyncDottxt(AsyncModel):
    """Async thin wrapper around the `dottxt.client.AsyncDotTxt` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `dottxt.client.AsyncDotTxt`
    client.

    """

    def __init__(
        self,
        client: "AsyncDottxtClient",
        model: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            A `dottxt.AsyncDotTxt` client.
        model
            The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

        """
        self.client = client
        self.model = model
        self.type_adapter = DottxtTypeAdapter()

    async def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Dottxt asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model as a JSON string.

        """
        prompt = self.type_adapter.format_input(model_input)
        json_schema = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model is not None:
            inference_kwargs["model"] = self.model

        if "model" not in inference_kwargs:
            raise ValueError(
                "A model identifier is required. Pass it to `from_dottxt_async()` "
                "or as a `model=` keyword argument at generation time."
            )

        with normalize_provider_errors(PROVIDER):
            result = await self.client.generate(
                input=prompt,
                response_format=json_schema,
                **inference_kwargs,
            )

        return json.dumps(result)

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support batch generation."
        )

    async def generate_stream(  # type: ignore[override]
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support streaming. Call the model/generator for "
            + "regular generation instead."
        )
        yield  # makes this an async generator so `async for` can consume it

`init(client, model=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncDotTxt`	A `dottxt.AsyncDotTxt` client.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Source code in outlines/models/dottxt.py

def __init__(
    self,
    client: "AsyncDottxtClient",
    model: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        A `dottxt.AsyncDotTxt` client.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    """
    self.client = client
    self.model = model
    self.type_adapter = DottxtTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using Dottxt asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model as a JSON string.

Source code in outlines/models/dottxt.py

async def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Dottxt asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model as a JSON string.

    """
    prompt = self.type_adapter.format_input(model_input)
    json_schema = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model is not None:
        inference_kwargs["model"] = self.model

    if "model" not in inference_kwargs:
        raise ValueError(
            "A model identifier is required. Pass it to `from_dottxt_async()` "
            "or as a `model=` keyword argument at generation time."
        )

    with normalize_provider_errors(PROVIDER):
        result = await self.client.generate(
            input=prompt,
            response_format=json_schema,
            **inference_kwargs,
        )

    return json.dumps(result)

`Dottxt`

Bases: Model

Thin wrapper around the dottxt.client.DotTxt client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the dottxt.client.DotTxt client.

Source code in outlines/models/dottxt.py

class Dottxt(Model):
    """Thin wrapper around the `dottxt.client.DotTxt` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `dottxt.client.DotTxt` client.

    """

    def __init__(
        self,
        client: "DottxtClient",
        model: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            A `dottxt.DotTxt` client.
        model
            The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

        """
        self.client = client
        self.model = model
        self.type_adapter = DottxtTypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Dottxt.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model as a JSON string.

        """
        prompt = self.type_adapter.format_input(model_input)
        json_schema = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model is not None:
            inference_kwargs["model"] = self.model

        if "model" not in inference_kwargs:
            raise ValueError(
                "A model identifier is required. Pass it to `from_dottxt()` "
                "or as a `model=` keyword argument at generation time."
            )

        with normalize_provider_errors(PROVIDER):
            result = self.client.generate(
                input=prompt,
                response_format=json_schema,
                **inference_kwargs,
            )

        return json.dumps(result)


    def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support batch generation."
        )

    def generate_stream(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support streaming. Call the model/generator for "
            + "regular generation instead."
        )

`init(client, model=None)`

Parameters:

Name	Type	Description	Default
`client`	`DotTxt`	A `dottxt.DotTxt` client.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Source code in outlines/models/dottxt.py

def __init__(
    self,
    client: "DottxtClient",
    model: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        A `dottxt.DotTxt` client.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    """
    self.client = client
    self.model = model
    self.type_adapter = DottxtTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using Dottxt.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model as a JSON string.

Source code in outlines/models/dottxt.py

def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Dottxt.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model as a JSON string.

    """
    prompt = self.type_adapter.format_input(model_input)
    json_schema = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model is not None:
        inference_kwargs["model"] = self.model

    if "model" not in inference_kwargs:
        raise ValueError(
            "A model identifier is required. Pass it to `from_dottxt()` "
            "or as a `model=` keyword argument at generation time."
        )

    with normalize_provider_errors(PROVIDER):
        result = self.client.generate(
            input=prompt,
            response_format=json_schema,
            **inference_kwargs,
        )

    return json.dumps(result)

`DottxtTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the Dottxt model.

Source code in outlines/models/dottxt.py

class DottxtTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Dottxt` model."""

    def format_input(self, model_input: str) -> str:
        """Format the prompt to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str
            The input to pass to the client.

        """
        if isinstance(model_input, str):
            return model_input
        raise TypeError(
            f"The input type {model_input} is not available with Dottxt. "
            "The only available type is `str`."
        )

    def format_output_type(self, output_type: Optional[Any] = None) -> str:
        """Format the output type to pass to the client.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        str
            The output type to pass to the client as a JSON schema string.

        """
        if output_type is None:
            raise TypeError(
                "You must provide an output type. Dottxt only supports "
                "constrained generation."
            )
        elif isinstance(output_type, Regex):
            raise TypeError(
                "Regex-based structured outputs will soon be available with "
                "Dottxt. Use an open source model in the meantime."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs will soon be available with "
                "Dottxt. Use an open source model in the meantime."
            )
        elif JsonSchema.is_json_schema(output_type):
            return cast(str, JsonSchema.convert_to(output_type, ["str"]))
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by Dottxt. "
                "Consider using a local mode instead."
            )

`format_input(model_input)`

Format the prompt to pass to the client.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The input provided by the user.	required

Returns:

Type	Description
`str`	The input to pass to the client.

Source code in outlines/models/dottxt.py

def format_input(self, model_input: str) -> str:
    """Format the prompt to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str
        The input to pass to the client.

    """
    if isinstance(model_input, str):
        return model_input
    raise TypeError(
        f"The input type {model_input} is not available with Dottxt. "
        "The only available type is `str`."
    )

`format_output_type(output_type=None)`

Format the output type to pass to the client.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`str`	The output type to pass to the client as a JSON schema string.

Source code in outlines/models/dottxt.py

def format_output_type(self, output_type: Optional[Any] = None) -> str:
    """Format the output type to pass to the client.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    str
        The output type to pass to the client as a JSON schema string.

    """
    if output_type is None:
        raise TypeError(
            "You must provide an output type. Dottxt only supports "
            "constrained generation."
        )
    elif isinstance(output_type, Regex):
        raise TypeError(
            "Regex-based structured outputs will soon be available with "
            "Dottxt. Use an open source model in the meantime."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs will soon be available with "
            "Dottxt. Use an open source model in the meantime."
        )
    elif JsonSchema.is_json_schema(output_type):
        return cast(str, JsonSchema.convert_to(output_type, ["str"]))
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by Dottxt. "
            "Consider using a local mode instead."
        )

`from_dottxt(client, model=None)`

Create an Outlines Dottxt or AsyncDottxt model instance from a dottxt.DotTxt or dottxt.AsyncDotTxt client instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[DotTxt, AsyncDotTxt]`	A `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Returns:

Type	Description
`Union[Dottxt, AsyncDottxt]`	An Outlines `Dottxt` or `AsyncDottxt` model instance.

Source code in outlines/models/dottxt.py

def from_dottxt(
    client: "Union[DottxtClient, AsyncDottxtClient]",
    model: Optional[str] = None,
) -> Union[Dottxt, AsyncDottxt]:
    """Create an Outlines `Dottxt` or `AsyncDottxt` model instance from a
    `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.

    Parameters
    ----------
    client
        A `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    Returns
    -------
    Union[Dottxt, AsyncDottxt]
        An Outlines `Dottxt` or `AsyncDottxt` model instance.

    """
    from dottxt import AsyncDotTxt, DotTxt

    if isinstance(client, DotTxt):
        return Dottxt(client, model)
    elif isinstance(client, AsyncDotTxt):
        return AsyncDottxt(client, model)
    else:
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "`dottxt.DotTxt` or `dottxt.AsyncDotTxt`."
        )

`gemini`

Integration with Gemini's API.

`Gemini`

Bases: Model

Thin wrapper around the google.genai.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the google.genai.Client client.

Source code in outlines/models/gemini.py

class Gemini(Model):
    """Thin wrapper around the `google.genai.Client` client.

    This wrapper is used to convert the input and output types specified by
    the users at a higher level to arguments to the `google.genai.Client`
    client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            A `google.genai.Client` instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = GeminiTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> str:
        """Generate a response from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        with normalize_provider_errors(PROVIDER):
            completion = self.client.models.generate_content(
                **contents,
                model=inference_kwargs.pop("model", self.model_name),
                config={**generation_config, **inference_kwargs}
            )

        return completion.text

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Gemini does not support batch generation."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        with normalize_provider_errors(PROVIDER):
            stream = self.client.models.generate_content_stream(
                **contents,
                model=inference_kwargs.pop("model", self.model_name),
                config={**generation_config, **inference_kwargs},
            )
            for chunk in stream:
                if hasattr(chunk, "text") and chunk.text:
                    yield chunk.text

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	A `google.genai.Client` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/gemini.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = GeminiTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate a response from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The response generated by the model.

Source code in outlines/models/gemini.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> str:
    """Generate a response from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    with normalize_provider_errors(PROVIDER):
        completion = self.client.models.generate_content(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs}
        )

    return completion.text

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Generate a stream of responses from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/gemini.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    with normalize_provider_errors(PROVIDER):
        stream = self.client.models.generate_content_stream(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs},
        )
        for chunk in stream:
            if hasattr(chunk, "text") and chunk.text:
                yield chunk.text

`GeminiTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the Gemini model.

GeminiTypeAdapter is responsible for preparing the arguments to Gemini's client models.generate_content method: the input (prompt and possibly image), as well as the output type (either JSON or multiple choice).

Source code in outlines/models/gemini.py

class GeminiTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Gemini` model.

    `GeminiTypeAdapter` is responsible for preparing the arguments to Gemini's
    client `models.generate_content` method: the input (prompt and possibly
    image), as well as the output type (either JSON or multiple choice).

    """

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the `contents` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        dict
            The `contents` argument to pass to the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "Gemini. The only available types are `str`, `list` and `Chat` "
            "(containing a prompt and images)."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> dict:
        return {"contents": [self._create_text_part(model_input)]}

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> dict:
        return {
            "contents": [
                self._create_message("user", model_input)
            ]
        }

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> dict:
        """Generate the `contents` argument to pass to the client when the user
        passes a Chat instance.

        """
        return {
            "contents": [
                self._create_message(message["role"], message["content"])
                for message in model_input.messages
            ]
        }

    def _create_message(self, role: str, content: str | list) -> dict:
        """Create a message."""

        # Gemini uses "model" instead of "assistant"
        if role == "assistant":
            role = "model"

        if isinstance(content, str):
            return {
                "role": role,
                "parts": [self._create_text_part(content)],
            }

        elif isinstance(content, list):
            prompt = content[0]
            images = content[1:]

            if not all(isinstance(image, Image) for image in images):
                raise ValueError("All assets provided must be of type Image")

            image_parts = [
                self._create_img_part(image)
                for image in images
            ]

            return {
                "role": role,
                "parts": [
                    self._create_text_part(prompt),
                    *image_parts,
                ],
            }

        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                "The content must be a string or a list containing a string "
                "and a list of images."
            )

        return {"contents": [prompt, *image_parts]}


    def _create_text_part(self, text: str) -> dict:
        """Create a text input part for a message."""
        return {
            "text": text,
        }

    def _create_img_part(self, image: Image) -> dict:
        """Create an image input part for a message."""
        return {
            "inline_data": {
                "mime_type": image.image_format,
                "data": image.image_str,
            }
        }

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the `generation_config` argument to pass to the client.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        dict
            The `generation_config` argument to pass to the client.

        """

        # Unsupported output pytes
        if isinstance(output_type, Regex):
            raise TypeError(
                "Neither regex-based structured outputs nor the `pattern` "
                "keyword in Json Schema are available with Gemini. Use an "
                "open source model or dottxt instead."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not available with Gemini. "
                "Use an open source model or dottxt instead."
            )

        if output_type is None:
            return {}

        # JSON schema types
        elif JsonSchema.is_json_schema(output_type):
            return self.format_json_output_type(
                JsonSchema.convert_to(
                    output_type,
                    ["dataclass", "typeddict", "pydantic"]
                )
            )

        # List of structured types
        elif is_typing_list(output_type):
            return self.format_list_output_type(output_type)

        # Multiple choice types
        elif is_enum(output_type):
            return self.format_enum_output_type(output_type)
        elif is_literal(output_type):
            enum = get_enum_from_literal(output_type)
            return self.format_enum_output_type(enum)
        elif isinstance(output_type, Choice):
            enum = get_enum_from_choice(output_type)
            return self.format_enum_output_type(enum)

        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by Gemini. "
                "Consider using a local model or dottxt instead."
            )

    def format_enum_output_type(self, output_type: Optional[Any]) -> dict:
        return {
            "response_mime_type": "text/x.enum",
            "response_schema": output_type,
        }

    def format_json_output_type(self, output_type: Optional[Any]) -> dict:
        return {
            "response_mime_type": "application/json",
            "response_schema": output_type,
        }

    def format_list_output_type(self, output_type: Optional[Any]) -> dict:
        args = get_args(output_type)

        if len(args) == 1:
            item_type = args[0]

            if JsonSchema.is_json_schema(item_type):
                return {
                    "response_mime_type": "application/json",
                    "response_schema": list[  # type: ignore
                        JsonSchema.convert_to(
                            item_type,
                            ["dataclass", "typeddict", "pydantic"]
                        )
                    ],
                }
            else:
                raise TypeError(
                    "The list items output type must contain a JSON schema "
                    "type."
                )

        raise TypeError(
            f"Gemini only supports homogeneous lists: "
            "list[BaseModel], list[TypedDict] or list[dataclass]. "
            f"Got {output_type} instead."
        )

`format_chat_model_input(model_input)`

Generate the contents argument to pass to the client when the user passes a Chat instance.

Source code in outlines/models/gemini.py

@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> dict:
    """Generate the `contents` argument to pass to the client when the user
    passes a Chat instance.

    """
    return {
        "contents": [
            self._create_message(message["role"], message["content"])
            for message in model_input.messages
        ]
    }

`format_input(model_input)`

Generate the contents argument to pass to the client.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`dict`	The `contents` argument to pass to the client.

Source code in outlines/models/gemini.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the `contents` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    dict
        The `contents` argument to pass to the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "Gemini. The only available types are `str`, `list` and `Chat` "
        "(containing a prompt and images)."
    )

`format_output_type(output_type=None)`

Generate the generation_config argument to pass to the client.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`dict`	The `generation_config` argument to pass to the client.

Source code in outlines/models/gemini.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the `generation_config` argument to pass to the client.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    dict
        The `generation_config` argument to pass to the client.

    """

    # Unsupported output pytes
    if isinstance(output_type, Regex):
        raise TypeError(
            "Neither regex-based structured outputs nor the `pattern` "
            "keyword in Json Schema are available with Gemini. Use an "
            "open source model or dottxt instead."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not available with Gemini. "
            "Use an open source model or dottxt instead."
        )

    if output_type is None:
        return {}

    # JSON schema types
    elif JsonSchema.is_json_schema(output_type):
        return self.format_json_output_type(
            JsonSchema.convert_to(
                output_type,
                ["dataclass", "typeddict", "pydantic"]
            )
        )

    # List of structured types
    elif is_typing_list(output_type):
        return self.format_list_output_type(output_type)

    # Multiple choice types
    elif is_enum(output_type):
        return self.format_enum_output_type(output_type)
    elif is_literal(output_type):
        enum = get_enum_from_literal(output_type)
        return self.format_enum_output_type(enum)
    elif isinstance(output_type, Choice):
        enum = get_enum_from_choice(output_type)
        return self.format_enum_output_type(enum)

    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by Gemini. "
            "Consider using a local model or dottxt instead."
        )

`from_gemini(client, model_name=None)`

Create an Outlines Gemini model instance from a google.genai.Client instance.

Parameters:

Name	Type	Description	Default
`client`	`Client`	A `google.genai.Client` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Gemini`	An Outlines `Gemini` model instance.

Source code in outlines/models/gemini.py

def from_gemini(client: "Client", model_name: Optional[str] = None) -> Gemini:
    """Create an Outlines `Gemini` model instance from a
    `google.genai.Client` instance.

    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Gemini
        An Outlines `Gemini` model instance.

    """
    return Gemini(client, model_name)

`llamacpp`

Integration with the llama-cpp-python library.

`LlamaCpp`

Bases: Model

Thin wrapper around the llama_cpp.Llama model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the llama_cpp.Llama model.

Source code in outlines/models/llamacpp.py

class LlamaCpp(Model):
    """Thin wrapper around the `llama_cpp.Llama` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `llama_cpp.Llama` model.
    """

    tensor_library_name = "numpy"

    def __init__(self, model: "Llama", chat_mode: bool = True):
        """
        Parameters
        ----------
        model
            A `llama_cpp.Llama` model instance.
        chat_mode
            Whether to enable chat mode. If `False`, the model will regard
            all `str` inputs as plain text prompts. If `True`, the model will
            regard all `str` inputs as user messages in a chat conversation.

        """
        self.model = model
        self.tokenizer = LlamaCppTokenizer(self.model)

        # Note: llama-cpp-python provides a default chat-template fallback even when
        # the user hasn't explicitly configured one:
        # https://github.com/abetlen/llama-cpp-python/blob/c37132b/llama_cpp/llama.py#L540-L545
        # We keep the default as True because the upstream library generally favors chat-style usage.
        self.type_adapter = LlamaCppTypeAdapter(has_chat_template=chat_mode)

    def generate(
        self,
        model_input: Union[Chat, str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        prompt = self.type_adapter.format_input(model_input)

        if isinstance(prompt, str):
            completion = self.model(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                **inference_kwargs,
            )
            result = completion["choices"][0]["text"]
        elif isinstance(prompt, list):
            completion = self.model.create_chat_completion(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                **inference_kwargs,
            )
            result = completion["choices"][0]["message"]["content"]
        else:  # Never reached  # pragma: no cover
            raise ValueError("Unexpected prompt type.")

        self.model.reset()

        return result

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("LlamaCpp does not support batch generation.")

    def generate_stream(
        self,
        model_input: Union[Chat, str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        prompt = self.type_adapter.format_input(model_input)

        if isinstance(prompt, str):
            generator = self.model(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                stream=True,
                **inference_kwargs,
            )
            for chunk in generator:
                yield chunk["choices"][0]["text"]

        elif isinstance(prompt, list):
            generator = self.model.create_chat_completion(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                stream=True,
                **inference_kwargs,
            )
            for chunk in generator:
                yield chunk["choices"][0]["delta"].get("content", "")
        else:  # Never reached  # pragma: no cover
            raise ValueError("Unexpected prompt type.")

`init(model, chat_mode=True)`

Parameters:

Name	Type	Description	Default
`model`	`Llama`	A `llama_cpp.Llama` model instance.	required
`chat_mode`	`bool`	Whether to enable chat mode. If `False`, the model will regard all `str` inputs as plain text prompts. If `True`, the model will regard all `str` inputs as user messages in a chat conversation.	`True`

Source code in outlines/models/llamacpp.py

def __init__(self, model: "Llama", chat_mode: bool = True):
    """
    Parameters
    ----------
    model
        A `llama_cpp.Llama` model instance.
    chat_mode
        Whether to enable chat mode. If `False`, the model will regard
        all `str` inputs as plain text prompts. If `True`, the model will
        regard all `str` inputs as user messages in a chat conversation.

    """
    self.model = model
    self.tokenizer = LlamaCppTokenizer(self.model)

    # Note: llama-cpp-python provides a default chat-template fallback even when
    # the user hasn't explicitly configured one:
    # https://github.com/abetlen/llama-cpp-python/blob/c37132b/llama_cpp/llama.py#L540-L545
    # We keep the default as True because the upstream library generally favors chat-style usage.
    self.type_adapter = LlamaCppTypeAdapter(has_chat_template=chat_mode)

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using llama-cpp-python.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the `Llama.__call__` method of the `llama-cpp-python` library.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/llamacpp.py

def generate(
    self,
    model_input: Union[Chat, str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    prompt = self.type_adapter.format_input(model_input)

    if isinstance(prompt, str):
        completion = self.model(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            **inference_kwargs,
        )
        result = completion["choices"][0]["text"]
    elif isinstance(prompt, list):
        completion = self.model.create_chat_completion(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            **inference_kwargs,
        )
        result = completion["choices"][0]["message"]["content"]
    else:  # Never reached  # pragma: no cover
        raise ValueError("Unexpected prompt type.")

    self.model.reset()

    return result

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using llama-cpp-python.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the `Llama.__call__` method of the `llama-cpp-python` library.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/llamacpp.py

def generate_stream(
    self,
    model_input: Union[Chat, str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    prompt = self.type_adapter.format_input(model_input)

    if isinstance(prompt, str):
        generator = self.model(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            stream=True,
            **inference_kwargs,
        )
        for chunk in generator:
            yield chunk["choices"][0]["text"]

    elif isinstance(prompt, list):
        generator = self.model.create_chat_completion(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            stream=True,
            **inference_kwargs,
        )
        for chunk in generator:
            yield chunk["choices"][0]["delta"].get("content", "")
    else:  # Never reached  # pragma: no cover
        raise ValueError("Unexpected prompt type.")

`LlamaCppTokenizer`

Bases: Tokenizer

Source code in outlines/models/llamacpp.py

class LlamaCppTokenizer(Tokenizer):
    def __init__(self, model: "Llama"):
        self.tokenizer = model.tokenizer()
        self.special_tokens: Set[str] = set()
        self.vocabulary: Dict[str, int] = dict()

        # TODO: Remove when https://github.com/ggerganov/llama.cpp/pull/5613
        # is resolved
        self._hf_tokenizer = None
        if (
            hasattr(model, "tokenizer_")
            and hasattr(model.tokenizer_, "hf_tokenizer")
        ):
            self._hf_tokenizer = model.tokenizer_.hf_tokenizer
            self.eos_token_id = self._hf_tokenizer.eos_token_id
            self.eos_token = self._hf_tokenizer.eos_token
            self.vocabulary = self._hf_tokenizer.get_vocab()
        else:
            from llama_cpp import (
                llama_model_get_vocab,
                llama_token_to_piece,
            )

            self.eos_token_id = model.token_eos()
            size = 32
            buffer = (ctypes.c_char * size)()
            vocab = llama_model_get_vocab(model.model)
            for i in range(model.n_vocab()):
                n = llama_token_to_piece(
                    vocab,
                    i,
                    buffer,
                    size,
                    0,
                    True
                )
                # n < 0 is an error return from llama_token_to_piece;
                # skip invalid tokens so they don't pollute the vocabulary.
                if n < 0:
                    continue
                # n > size means the piece was truncated; retry with a
                # larger buffer so distinct tokens are not collapsed.
                if n > size:
                    big = (ctypes.c_char * n)()
                    llama_token_to_piece(vocab, i, big, n, 0, True)
                    token_piece = big[:n].decode("utf-8", errors="replace")  # type: ignore
                else:
                    token_piece = buffer[:n].decode("utf-8", errors="replace")  # type: ignore
                self.vocabulary[token_piece] = i
                if i == self.eos_token_id:
                    self.eos_token = token_piece

        self.pad_token_id = self.eos_token_id
        # ensure stable ordering of vocabulary
        self.vocabulary = {
            tok: tok_id
            for tok, tok_id
            in sorted(self.vocabulary.items(), key=lambda x: x[1])
        }
        self._hash = None

    def decode(self, token_ids: List[int]) -> List[str]:
        decoded_bytes = self.tokenizer.detokenize(token_ids)
        return [decoded_bytes.decode("utf-8", errors="ignore")]

    def encode(
        self,
        prompt: Union[str, List[str]],
        add_bos: bool = True,
        special: bool = True,
    ) -> Tuple[List[int], List[int]]:
        if isinstance(prompt, list):
            raise NotImplementedError(
                "llama-cpp-python tokenizer doesn't support batch tokenization"
            )
        token_ids = self.tokenizer.tokenize(
            prompt.encode("utf-8", errors="ignore"),
            add_bos=add_bos,
            special=special,
        )
        # generate attention mask, missing from llama-cpp-python.
        # For a single (non-batched) prompt there is no real padding, so
        # every token — including EOS when it appears inside the prompt —
        # should be attended.  We therefore always set the mask to 1.
        attention_mask = [1] * len(token_ids)
        return token_ids, attention_mask

    def convert_token_to_string(self, token: str) -> str:
        if self._hf_tokenizer is not None:
            token_str = self._hf_tokenizer.convert_tokens_to_string([token])
            if (
                token.startswith(SPIECE_UNDERLINE)
                or token == "<0x20>"
            ):  # pragma: no cover
                token_str = " " + token_str
            return token_str
        else:
            return token

    def __eq__(self, other):
        if not isinstance(other, LlamaCppTokenizer):
            return False
        return self.__getstate__() == other.__getstate__()

    def __hash__(self):
        # We create a custom hash as pickle.dumps(self) is not stable
        if self._hash is None:
            self._hash = hash((
                tuple(sorted(self.vocabulary.items())),
                self.eos_token_id,
                self.eos_token,
                self.pad_token_id,
                tuple(sorted(self.special_tokens)),
            ))
        return self._hash

    def __getstate__(self):
        """Create a stable representation for outlines.caching"""
        return (
            self.vocabulary,
            self.eos_token_id,
            self.eos_token,
            self.pad_token_id,
            sorted(self.special_tokens),
        )

    def __setstate__(self, state):
        raise NotImplementedError("Cannot load a pickled llamacpp tokenizer")

`getstate()`

Create a stable representation for outlines.caching

Source code in outlines/models/llamacpp.py

def __getstate__(self):
    """Create a stable representation for outlines.caching"""
    return (
        self.vocabulary,
        self.eos_token_id,
        self.eos_token,
        self.pad_token_id,
        sorted(self.special_tokens),
    )

`LlamaCppTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the LlamaCpp model.

LlamaCppTypeAdapter is responsible for preparing the arguments to the Llama object text generation methods.

Source code in outlines/models/llamacpp.py

class LlamaCppTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `LlamaCpp` model.

    `LlamaCppTypeAdapter` is responsible for preparing the arguments to
    the `Llama` object text generation methods.

    """

    def __init__(self, has_chat_template: bool = False):
        """
        Parameters
        ----------
        has_chat_template
            Whether the model has a chat template defined.
        """
        self.has_chat_template = has_chat_template

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {type(model_input)} is not available with "
            "LlamaCpp. The only available types are `str` and `Chat`."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str | list:
        if self.has_chat_template:
            return [{"role": "user", "content": model_input}]
        return model_input

    @format_input.register(Chat)
    def format_chat_input(self, model_input: Chat) -> list:
        if not all(
            isinstance(message["content"], str)
            for message in model_input.messages
        ):
            raise ValueError(
                "LlamaCpp does not support multi-modal messages."
                + "The content of each message must be a string."
            )

        return  [
            {
                "role": message["role"],
                "content": message["content"],
            }
            for message in model_input.messages
        ]

    def format_output_type(
        self, output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional["LogitsProcessorList"]:
        """Generate the logits processor argument to pass to the model.

        Parameters
        ----------
        output_type
            The logits processor provided.

        Returns
        -------
        LogitsProcessorList
            The logits processor to pass to the model.

        """
        from llama_cpp import LogitsProcessorList

        if output_type is not None:
            return LogitsProcessorList([output_type])
        return None

`init(has_chat_template=False)`

Parameters:

Name	Type	Description	Default
`has_chat_template`	`bool`	Whether the model has a chat template defined.	`False`

Source code in outlines/models/llamacpp.py

def __init__(self, has_chat_template: bool = False):
    """
    Parameters
    ----------
    has_chat_template
        Whether the model has a chat template defined.
    """
    self.has_chat_template = has_chat_template

`format_input(model_input)`

Generate the prompt argument to pass to the model.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`str`	The formatted input to be passed to the model.

Source code in outlines/models/llamacpp.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {type(model_input)} is not available with "
        "LlamaCpp. The only available types are `str` and `Chat`."
    )

`format_output_type(output_type=None)`

Generate the logits processor argument to pass to the model.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor provided.	`None`

Returns:

Type	Description
`LogitsProcessorList`	The logits processor to pass to the model.

Source code in outlines/models/llamacpp.py

def format_output_type(
    self, output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional["LogitsProcessorList"]:
    """Generate the logits processor argument to pass to the model.

    Parameters
    ----------
    output_type
        The logits processor provided.

    Returns
    -------
    LogitsProcessorList
        The logits processor to pass to the model.

    """
    from llama_cpp import LogitsProcessorList

    if output_type is not None:
        return LogitsProcessorList([output_type])
    return None

`from_llamacpp(model, chat_mode=True)`

Create an Outlines LlamaCpp model instance from a llama_cpp.Llama instance.

Parameters:

Name	Type	Description	Default
`model`	`Llama`	A `llama_cpp.Llama` instance.	required
`chat_mode`	`bool`	Whether to enable chat mode. If `False`, the model will regard all `str` inputs as plain text prompts. If `True`, the model will regard all `str` inputs as user messages in a chat conversation.	`True`

Returns:

Type	Description
`LlamaCpp`	An Outlines `LlamaCpp` model instance.

Source code in outlines/models/llamacpp.py

def from_llamacpp(model: "Llama", chat_mode: bool = True) -> LlamaCpp:
    """Create an Outlines `LlamaCpp` model instance from a
    `llama_cpp.Llama` instance.

    Parameters
    ----------
    model
        A `llama_cpp.Llama` instance.
    chat_mode
        Whether to enable chat mode. If `False`, the model will regard
        all `str` inputs as plain text prompts. If `True`, the model will
        regard all `str` inputs as user messages in a chat conversation.

    Returns
    -------
    LlamaCpp
        An Outlines `LlamaCpp` model instance.

    """
    return LlamaCpp(model, chat_mode=chat_mode)

`lmstudio`

Integration with the lmstudio library.

Local runtime calls intentionally bypass outlines.exceptions.normalize_provider_errors().

`AsyncLMStudio`

Bases: AsyncModel

Thin wrapper around a lmstudio.AsyncClient client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the LMStudio async client.

Source code in outlines/models/lmstudio.py

class AsyncLMStudio(AsyncModel):
    """Thin wrapper around a `lmstudio.AsyncClient` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the LMStudio async client.

    """

    def __init__(
        self, client: "AsyncClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client
            A LMStudio AsyncClient instance.
        model_name
            The name of the model to use. If not provided, uses the default
            loaded model in LMStudio.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = LMStudioTypeAdapter()
        self._context_entered = False

    async def close(self) -> None:
        """Close the async client and release resources."""
        if self._context_entered:
            await self.client.__aexit__(None, None, None)
            self._context_entered = False

    async def generate(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using LMStudio asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        str
            The text generated by the model.

        """
        if not self._context_entered:
            await self.client.__aenter__()
            self._context_entered = True

        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        result = await model.respond(formatted_input, **kwargs)
        return result.content

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `lmstudio` library does not support batch inference."
        )

    async def generate_stream(  # type: ignore
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using LMStudio asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        if not self._context_entered:
            await self.client.__aenter__()
            self._context_entered = True

        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        stream = await model.respond_stream(formatted_input, **kwargs)
        async for fragment in stream:
            yield fragment.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncClient`	A LMStudio AsyncClient instance.	required
`model_name`	`Optional[str]`	The name of the model to use. If not provided, uses the default loaded model in LMStudio.	`None`

Source code in outlines/models/lmstudio.py

def __init__(
    self, client: "AsyncClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client
        A LMStudio AsyncClient instance.
    model_name
        The name of the model to use. If not provided, uses the default
        loaded model in LMStudio.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = LMStudioTypeAdapter()
    self._context_entered = False

`close()` `async`

Close the async client and release resources.

Source code in outlines/models/lmstudio.py

async def close(self) -> None:
    """Close the async client and release resources."""
    if self._context_entered:
        await self.client.__aexit__(None, None, None)
        self._context_entered = False

`generate(model_input, output_type=None, **kwargs)` `async`

Generate text using LMStudio asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/lmstudio.py

async def generate(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using LMStudio asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    str
        The text generated by the model.

    """
    if not self._context_entered:
        await self.client.__aenter__()
        self._context_entered = True

    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    result = await model.respond(formatted_input, **kwargs)
    return result.content

`generate_stream(model_input, output_type=None, **kwargs)` `async`

Stream text using LMStudio asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/lmstudio.py

async def generate_stream(  # type: ignore
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using LMStudio asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    if not self._context_entered:
        await self.client.__aenter__()
        self._context_entered = True

    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    stream = await model.respond_stream(formatted_input, **kwargs)
    async for fragment in stream:
        yield fragment.content

`LMStudio`

Bases: Model

Thin wrapper around a lmstudio.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the LMStudio client.

Source code in outlines/models/lmstudio.py

class LMStudio(Model):
    """Thin wrapper around a `lmstudio.Client` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the LMStudio client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            A LMStudio Client instance obtained via `lmstudio.Client()` or
            `lmstudio.get_default_client()`.
        model_name
            The name of the model to use. If not provided, uses the default
            loaded model in LMStudio.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = LMStudioTypeAdapter()

    def generate(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using LMStudio.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        result = model.respond(formatted_input, **kwargs)
        return result.content

    def generate_batch(
        self,
        model_input,
        output_type=None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `lmstudio` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using LMStudio.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        stream = model.respond_stream(formatted_input, **kwargs)
        for fragment in stream:
            yield fragment.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	A LMStudio Client instance obtained via `lmstudio.Client()` or `lmstudio.get_default_client()`.	required
`model_name`	`Optional[str]`	The name of the model to use. If not provided, uses the default loaded model in LMStudio.	`None`

Source code in outlines/models/lmstudio.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        A LMStudio Client instance obtained via `lmstudio.Client()` or
        `lmstudio.get_default_client()`.
    model_name
        The name of the model to use. If not provided, uses the default
        loaded model in LMStudio.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = LMStudioTypeAdapter()

`generate(model_input, output_type=None, **kwargs)`

Generate text using LMStudio.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/lmstudio.py

def generate(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using LMStudio.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    result = model.respond(formatted_input, **kwargs)
    return result.content

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using LMStudio.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/lmstudio.py

def generate_stream(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> Iterator[str]:
    """Stream text using LMStudio.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    stream = model.respond_stream(formatted_input, **kwargs)
    for fragment in stream:
        yield fragment.content

`LMStudioTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the LMStudio model.

Source code in outlines/models/lmstudio.py

class LMStudioTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `LMStudio` model."""

    def _prepare_lmstudio_image(self, image: Image):
        """Convert Outlines Image to LMStudio image handle.

        LMStudio's SDK only accepts file paths, raw bytes, or binary IO objects.
        Unlike Ollama which accepts base64 directly, we must decode from base64.
        """
        import base64

        import lmstudio as lms

        image_bytes = base64.b64decode(image.image_str)
        return lms.prepare_image(image_bytes)

    @singledispatchmethod
    def format_input(self, model_input):
        """Format input for LMStudio model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str | LMStudioChat
            The formatted input to be passed to the model.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "LMStudio. The only available types are `str`, `list` and `Chat`."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> str:
        """Pass through string input directly to LMStudio."""
        return model_input

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> "LMStudioChat":
        """Handle list input containing prompt and images."""
        from lmstudio import Chat as LMSChat

        prompt = model_input[0]
        images = model_input[1:]

        if not all(isinstance(img, Image) for img in images):
            raise ValueError("All assets provided must be of type Image")

        chat = LMSChat()
        image_handles = [self._prepare_lmstudio_image(img) for img in images]
        chat.add_user_message(prompt, images=image_handles)
        return chat

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> "LMStudioChat":
        """Convert Outlines Chat to LMStudio Chat with image support."""
        from lmstudio import Chat as LMSChat

        system_prompt = None
        messages = model_input.messages

        if messages and messages[0]["role"] == "system":
            system_prompt = messages[0]["content"]
            messages = messages[1:]

        chat = LMSChat(system_prompt) if system_prompt else LMSChat()

        for message in messages:
            role = message["role"]
            content = message["content"]

            if role == "user":
                if isinstance(content, str):
                    chat.add_user_message(content)
                elif isinstance(content, list):
                    prompt = content[0]
                    images = content[1:]
                    if not all(isinstance(img, Image) for img in images):
                        raise ValueError("All assets provided must be of type Image")
                    image_handles = [self._prepare_lmstudio_image(img) for img in images]
                    chat.add_user_message(prompt, images=image_handles)
                else:
                    raise ValueError(
                        f"Invalid content type: {type(content)}. "
                        "The content must be a string or a list containing a string "
                        "and a list of images."
                    )
            elif role == "assistant":
                chat.add_assistant_response(content)
            else:
                raise ValueError(f"Unsupported role: {role}")

        return chat

    def format_output_type(
        self, output_type: Optional[Any] = None
    ) -> Optional[dict]:
        """Format the output type to pass to the model.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        Optional[dict]
            The formatted output type (JSON schema) to be passed to the model.

        """
        if output_type is None:
            return None
        elif isinstance(output_type, Regex):
            raise TypeError(
                "Regex-based structured outputs are not supported by LMStudio. "
                "Use an open source model in the meantime."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not supported by LMStudio. "
                "Use an open source model in the meantime."
            )
        elif JsonSchema.is_json_schema(output_type):
            return cast(dict, JsonSchema.convert_to(output_type, ["dict"]))
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by LMStudio. "
                "Consider using a local model instead."
            )

`format_chat_model_input(model_input)`

Convert Outlines Chat to LMStudio Chat with image support.

Source code in outlines/models/lmstudio.py

@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> "LMStudioChat":
    """Convert Outlines Chat to LMStudio Chat with image support."""
    from lmstudio import Chat as LMSChat

    system_prompt = None
    messages = model_input.messages

    if messages and messages[0]["role"] == "system":
        system_prompt = messages[0]["content"]
        messages = messages[1:]

    chat = LMSChat(system_prompt) if system_prompt else LMSChat()

    for message in messages:
        role = message["role"]
        content = message["content"]

        if role == "user":
            if isinstance(content, str):
                chat.add_user_message(content)
            elif isinstance(content, list):
                prompt = content[0]
                images = content[1:]
                if not all(isinstance(img, Image) for img in images):
                    raise ValueError("All assets provided must be of type Image")
                image_handles = [self._prepare_lmstudio_image(img) for img in images]
                chat.add_user_message(prompt, images=image_handles)
            else:
                raise ValueError(
                    f"Invalid content type: {type(content)}. "
                    "The content must be a string or a list containing a string "
                    "and a list of images."
                )
        elif role == "assistant":
            chat.add_assistant_response(content)
        else:
            raise ValueError(f"Unsupported role: {role}")

    return chat

`format_input(model_input)`

Format input for LMStudio model.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`str \| Chat`	The formatted input to be passed to the model.

Source code in outlines/models/lmstudio.py

@singledispatchmethod
def format_input(self, model_input):
    """Format input for LMStudio model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str | LMStudioChat
        The formatted input to be passed to the model.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "LMStudio. The only available types are `str`, `list` and `Chat`."
    )

`format_list_model_input(model_input)`

Handle list input containing prompt and images.

Source code in outlines/models/lmstudio.py

@format_input.register(list)
def format_list_model_input(self, model_input: list) -> "LMStudioChat":
    """Handle list input containing prompt and images."""
    from lmstudio import Chat as LMSChat

    prompt = model_input[0]
    images = model_input[1:]

    if not all(isinstance(img, Image) for img in images):
        raise ValueError("All assets provided must be of type Image")

    chat = LMSChat()
    image_handles = [self._prepare_lmstudio_image(img) for img in images]
    chat.add_user_message(prompt, images=image_handles)
    return chat

`format_output_type(output_type=None)`

Format the output type to pass to the model.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`Optional[dict]`	The formatted output type (JSON schema) to be passed to the model.

Source code in outlines/models/lmstudio.py

def format_output_type(
    self, output_type: Optional[Any] = None
) -> Optional[dict]:
    """Format the output type to pass to the model.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    Optional[dict]
        The formatted output type (JSON schema) to be passed to the model.

    """
    if output_type is None:
        return None
    elif isinstance(output_type, Regex):
        raise TypeError(
            "Regex-based structured outputs are not supported by LMStudio. "
            "Use an open source model in the meantime."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not supported by LMStudio. "
            "Use an open source model in the meantime."
        )
    elif JsonSchema.is_json_schema(output_type):
        return cast(dict, JsonSchema.convert_to(output_type, ["dict"]))
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by LMStudio. "
            "Consider using a local model instead."
        )

`format_str_model_input(model_input)`

Pass through string input directly to LMStudio.

Source code in outlines/models/lmstudio.py

@format_input.register(str)
def format_str_model_input(self, model_input: str) -> str:
    """Pass through string input directly to LMStudio."""
    return model_input

`from_lmstudio(client, model_name=None)`

Create an Outlines LMStudio model instance from a lmstudio.Client or lmstudio.AsyncClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[Client, AsyncClient]`	A `lmstudio.Client` or `lmstudio.AsyncClient` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[LMStudio, AsyncLMStudio]`	An Outlines `LMStudio` or `AsyncLMStudio` model instance.

Source code in outlines/models/lmstudio.py

def from_lmstudio(
    client: Union["Client", "AsyncClient"],
    model_name: Optional[str] = None,
) -> Union[LMStudio, AsyncLMStudio]:
    """Create an Outlines `LMStudio` model instance from a
    `lmstudio.Client` or `lmstudio.AsyncClient` instance.

    Parameters
    ----------
    client
        A `lmstudio.Client` or `lmstudio.AsyncClient` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[LMStudio, AsyncLMStudio]
        An Outlines `LMStudio` or `AsyncLMStudio` model instance.

    """
    from lmstudio import AsyncClient, Client

    if isinstance(client, Client):
        return LMStudio(client, model_name)
    elif isinstance(client, AsyncClient):
        return AsyncLMStudio(client, model_name)
    else:
        raise ValueError(
            "Invalid client type, the client must be an instance of "
            "`lmstudio.Client` or `lmstudio.AsyncClient`."
        )

`mistral`

Integration with Mistral AI API.

`AsyncMistral`

Bases: AsyncModel

Async thin wrapper around the mistralai.Mistral client.

Converts input and output types to arguments for the mistralai.Mistral client's async methods (chat.complete_async or chat.stream_async).

Source code in outlines/models/mistral.py

class AsyncMistral(AsyncModel):
    """Async thin wrapper around the `mistralai.Mistral` client.

    Converts input and output types to arguments for the `mistralai.Mistral`
    client's async methods (`chat.complete_async` or `chat.stream_async`).

    """

    def __init__(
        self, client: "MistralClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client : MistralClient
            A mistralai.Mistral client instance.
        model_name : Optional[str]
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = MistralTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate a response from the model asynchronously.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs : Any
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The response generated by the model as text.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = await self.client.chat.complete_async(
                messages=messages,
                response_format=response_format,
                stream=False,
                **inference_kwargs,
            )

        outputs = [choice.message for choice in result.choices]

        if len(outputs) == 1:
            return outputs[0].content
        else:
            return [m.content for m in outputs]

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The mistralai library does not support batch inference."
        )

    async def generate_stream(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        """Generate text from the model as an async stream of chunks.

        Parameters
        ----------
        model_input
            str, list, or chat input to generate from.
        output_type
            Optional type for structured output.
        **inference_kwargs
            Extra kwargs like "model" name.

        Yields
        ------
        str
            Chunks of text as they are streamed.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.stream_async(
                messages=messages,
                response_format=response_format,
                **inference_kwargs
            )
            async for chunk in response:
                if (
                    hasattr(chunk, "data")
                    and chunk.data.choices
                    and len(chunk.data.choices) > 0
                    and hasattr(chunk.data.choices[0], "delta")
                    and chunk.data.choices[0].delta.content is not None
                ):
                    yield chunk.data.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/mistral.py

def __init__(
    self, client: "MistralClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = MistralTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate a response from the model asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The response generated by the model as text.

Source code in outlines/models/mistral.py

async def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate a response from the model asynchronously.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs : Any
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The response generated by the model as text.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = await self.client.chat.complete_async(
            messages=messages,
            response_format=response_format,
            stream=False,
            **inference_kwargs,
        )

    outputs = [choice.message for choice in result.choices]

    if len(outputs) == 1:
        return outputs[0].content
    else:
        return [m.content for m in outputs]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Generate text from the model as an async stream of chunks.

Parameters:

Name	Description	Default
`model_input`	str, list, or chat input to generate from.	required
`output_type`	Optional type for structured output.	`None`
`**inference_kwargs`	Extra kwargs like "model" name.	`{}`

Yields:

Type	Description
`str`	Chunks of text as they are streamed.

Source code in outlines/models/mistral.py

async def generate_stream(
    self,
    model_input,
    output_type=None,
    **inference_kwargs,
):
    """Generate text from the model as an async stream of chunks.

    Parameters
    ----------
    model_input
        str, list, or chat input to generate from.
    output_type
        Optional type for structured output.
    **inference_kwargs
        Extra kwargs like "model" name.

    Yields
    ------
    str
        Chunks of text as they are streamed.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.stream_async(
            messages=messages,
            response_format=response_format,
            **inference_kwargs
        )
        async for chunk in response:
            if (
                hasattr(chunk, "data")
                and chunk.data.choices
                and len(chunk.data.choices) > 0
                and hasattr(chunk.data.choices[0], "delta")
                and chunk.data.choices[0].delta.content is not None
            ):
                yield chunk.data.choices[0].delta.content

`Mistral`

Bases: Model

Thin wrapper around the mistralai.Mistral client.

Converts input and output types to arguments for the mistralai.Mistral client's chat.complete or chat.stream methods.

Source code in outlines/models/mistral.py

class Mistral(Model):
    """Thin wrapper around the `mistralai.Mistral` client.

    Converts input and output types to arguments for the `mistralai.Mistral`
    client's `chat.complete` or `chat.stream` methods.

    """

    def __init__(
        self, client: "MistralClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client : MistralClient
            A mistralai.Mistral client instance.
        model_name : Optional[str]
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = MistralTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate a response from the model.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs : Any
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The response generated by the model as text.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = self.client.chat.complete(
                messages=messages,
                response_format=response_format,
                **inference_kwargs,
            )

        outputs = [choice.message for choice in result.choices]

        if len(outputs) == 1:
            return outputs[0].content
        else:
            return [m.content for m in outputs]

    def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `mistralai` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text chunks generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.stream(
                messages=messages,
                response_format=response_format,
                **inference_kwargs
            )
            for chunk in stream:
                if (
                    hasattr(chunk, "data")
                    and chunk.data.choices
                    and chunk.data.choices[0].delta.content is not None
                ):
                    yield chunk.data.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/mistral.py

def __init__(
    self, client: "MistralClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = MistralTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate a response from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The response generated by the model as text.

Source code in outlines/models/mistral.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate a response from the model.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs : Any
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The response generated by the model as text.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = self.client.chat.complete(
            messages=messages,
            response_format=response_format,
            **inference_kwargs,
        )

    outputs = [choice.message for choice in result.choices]

    if len(outputs) == 1:
        return outputs[0].content
    else:
        return [m.content for m in outputs]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Generate a stream of responses from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text chunks generated by the model.

Source code in outlines/models/mistral.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text chunks generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.stream(
            messages=messages,
            response_format=response_format,
            **inference_kwargs
        )
        for chunk in stream:
            if (
                hasattr(chunk, "data")
                and chunk.data.choices
                and chunk.data.choices[0].delta.content is not None
            ):
                yield chunk.data.choices[0].delta.content

`MistralTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the Mistral model.

Prepares arguments for Mistral's client chat.complete, chat.complete_async, or chat.stream methods. Handles input (prompt or chat messages) and output type (JSON schema types).

Source code in outlines/models/mistral.py

class MistralTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Mistral` model.

    Prepares arguments for Mistral's client `chat.complete`,
    `chat.complete_async`, or `chat.stream` methods. Handles input (prompt or
    chat messages) and output type (JSON schema types).
    """

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the `messages` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        list
            The `messages` argument to pass to the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "Mistral. The only available types are `str`, `list` and `Chat`."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> list:
        """Format a string input into a list of messages.

        Parameters
        ----------
        model_input : str
            The input string prompt.

        Returns
        -------
        list
            A list of Mistral message objects.

        """
        from mistralai import UserMessage

        return [UserMessage(content=model_input)]

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> list:
        """Format a list input into a list of messages.

        Parameters
        ----------
        model_input : list
            The input list, containing a string prompt and optionally Image
            objects (vision models only).

        Returns
        -------
        list
            A list of Mistral message objects.

        """
        from mistralai import UserMessage

        return [UserMessage(content=self._create_message_content(model_input))]

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> list:
        """Format a Chat input into a list of messages.

        Parameters
        ----------
        model_input : Chat
            The Chat object containing a list of message dictionaries.

        Returns
        -------
        list
            A list of Mistral message objects.

        """
        from mistralai import UserMessage, AssistantMessage, SystemMessage

        messages = []

        for message in model_input.messages:
            role = message["role"]
            content = message["content"]
            if role == "user":
                messages.append(
                    UserMessage(content=self._create_message_content(content))
                )
            elif role == "assistant":
                messages.append(AssistantMessage(content=content))
            elif role == "system":
                messages.append(SystemMessage(content=content))
            else:
                raise ValueError(f"Unsupported role: {role}")

        return messages

    def _create_message_content(
        self, content: Union[str, list]
    ) -> Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]:
        """Create message content from an input.

        Parameters
        ----------
        content : Union[str, list]
            The content to format, either a string or a list containing a
            string and optionally Image objects.

        Returns
        -------
        Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]
            The formatted content, either a string or a list of content parts
            (text and image URLs).

        """
        if isinstance(content, str):
            return content
        elif isinstance(content, list):
            if not content:
                raise ValueError("Content list cannot be empty.")
            if not isinstance(content[0], str):
                raise ValueError(
                    "The first item in the list should be a string."
                )
            if len(content) == 1:
                return content[0]
            content_parts: List[Dict[str, Union[str, Dict[str, str]]]] = [
                {"type": "text", "text": content[0]}
            ]
            for item in content[1:]:
                if isinstance(item, Image):
                    data_url = f"data:{item.image_format};base64,{item.image_str}"
                    content_parts.append({
                        "type": "image_url",
                        "image_url": {"url": data_url}
                    })
                else:
                    raise ValueError(
                        f"Invalid item type in content list: {type(item)}. "
                        + "Expected Image objects after the first string."
                    )
            return content_parts
        else:
            raise TypeError(
                f"Invalid content type: {type(content)}. "
                + "Content must be a string or a list starting with a string "
                + "followed by optional Image objects."
            )

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the `response_format` argument to pass to the client.

        Parameters
        ----------
        output_type : Optional[Any]
            The desired output type provided by the user.

        Returns
        -------
        dict
            The `response_format` dict to pass to the client.

        """
        if output_type is None:
            return {}

        # JSON schema types
        elif is_pydantic_model(output_type):
            schema = output_type.model_json_schema()
            return self.format_json_schema_type(schema, output_type.__name__)
        elif is_dataclass(output_type):
            schema = TypeAdapter(output_type).json_schema()
            return self.format_json_schema_type(schema, output_type.__name__)
        elif is_typed_dict(output_type):
            schema = TypeAdapter(output_type).json_schema()
            return self.format_json_schema_type(schema, output_type.__name__)
        elif is_genson_schema_builder(output_type):
            schema = json.loads(output_type.to_json())
            return self.format_json_schema_type(schema)
        elif isinstance(output_type, JsonSchema):
            return self.format_json_schema_type(json.loads(output_type.schema))

        # Json mode
        elif is_native_dict(output_type):
            return {"type": "json_object"}

        # Unsupported types
        elif isinstance(output_type, Regex):
            raise TypeError(
                "Regex-based structured outputs are not available with "
                "Mistral."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not available with Mistral."
            )
        else:
            type_name = getattr(output_type, "__name__", str(output_type))
            raise TypeError(
                f"The type {type_name} is not available with Mistral."
            )

    def format_json_schema_type(
        self, schema: dict, schema_name: str = "default"
    ) -> dict:
        """Create the `response_format` argument to pass to the client from a
        JSON schema dictionary.

        Parameters
        ----------
        schema : dict
            The JSON schema to format.
        schema_name : str
            The name of the schema.

        Returns
        -------
        dict
            The value of the `response_format` argument to pass to the client.

        """
        schema = set_additional_properties_false_json_schema(schema)

        return {
            "type": "json_schema",
            "json_schema": {
                "schema": schema,
                "name": schema_name.lower(),
                "strict": True
            }
        }

`format_chat_model_input(model_input)`

Format a Chat input into a list of messages.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat`	The Chat object containing a list of message dictionaries.	required

Returns:

Type	Description
`list`	A list of Mistral message objects.

Source code in outlines/models/mistral.py

@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> list:
    """Format a Chat input into a list of messages.

    Parameters
    ----------
    model_input : Chat
        The Chat object containing a list of message dictionaries.

    Returns
    -------
    list
        A list of Mistral message objects.

    """
    from mistralai import UserMessage, AssistantMessage, SystemMessage

    messages = []

    for message in model_input.messages:
        role = message["role"]
        content = message["content"]
        if role == "user":
            messages.append(
                UserMessage(content=self._create_message_content(content))
            )
        elif role == "assistant":
            messages.append(AssistantMessage(content=content))
        elif role == "system":
            messages.append(SystemMessage(content=content))
        else:
            raise ValueError(f"Unsupported role: {role}")

    return messages

`format_input(model_input)`

Generate the messages argument to pass to the client.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`list`	The `messages` argument to pass to the client.

Source code in outlines/models/mistral.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the `messages` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    list
        The `messages` argument to pass to the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "Mistral. The only available types are `str`, `list` and `Chat`."
    )

`format_json_schema_type(schema, schema_name='default')`

Create the response_format argument to pass to the client from a JSON schema dictionary.

Parameters:

Name	Type	Description	Default
`schema`	`dict`	The JSON schema to format.	required
`schema_name`	`str`	The name of the schema.	`'default'`

Returns:

Type	Description
`dict`	The value of the `response_format` argument to pass to the client.

Source code in outlines/models/mistral.py

def format_json_schema_type(
    self, schema: dict, schema_name: str = "default"
) -> dict:
    """Create the `response_format` argument to pass to the client from a
    JSON schema dictionary.

    Parameters
    ----------
    schema : dict
        The JSON schema to format.
    schema_name : str
        The name of the schema.

    Returns
    -------
    dict
        The value of the `response_format` argument to pass to the client.

    """
    schema = set_additional_properties_false_json_schema(schema)

    return {
        "type": "json_schema",
        "json_schema": {
            "schema": schema,
            "name": schema_name.lower(),
            "strict": True
        }
    }

`format_list_model_input(model_input)`

Format a list input into a list of messages.

Parameters:

Name	Type	Description	Default
`model_input`	`list`	The input list, containing a string prompt and optionally Image objects (vision models only).	required

Returns:

Type	Description
`list`	A list of Mistral message objects.

Source code in outlines/models/mistral.py

@format_input.register(list)
def format_list_model_input(self, model_input: list) -> list:
    """Format a list input into a list of messages.

    Parameters
    ----------
    model_input : list
        The input list, containing a string prompt and optionally Image
        objects (vision models only).

    Returns
    -------
    list
        A list of Mistral message objects.

    """
    from mistralai import UserMessage

    return [UserMessage(content=self._create_message_content(model_input))]

`format_output_type(output_type=None)`

Generate the response_format argument to pass to the client.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The desired output type provided by the user.	`None`

Returns:

Type	Description
`dict`	The `response_format` dict to pass to the client.

Source code in outlines/models/mistral.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the `response_format` argument to pass to the client.

    Parameters
    ----------
    output_type : Optional[Any]
        The desired output type provided by the user.

    Returns
    -------
    dict
        The `response_format` dict to pass to the client.

    """
    if output_type is None:
        return {}

    # JSON schema types
    elif is_pydantic_model(output_type):
        schema = output_type.model_json_schema()
        return self.format_json_schema_type(schema, output_type.__name__)
    elif is_dataclass(output_type):
        schema = TypeAdapter(output_type).json_schema()
        return self.format_json_schema_type(schema, output_type.__name__)
    elif is_typed_dict(output_type):
        schema = TypeAdapter(output_type).json_schema()
        return self.format_json_schema_type(schema, output_type.__name__)
    elif is_genson_schema_builder(output_type):
        schema = json.loads(output_type.to_json())
        return self.format_json_schema_type(schema)
    elif isinstance(output_type, JsonSchema):
        return self.format_json_schema_type(json.loads(output_type.schema))

    # Json mode
    elif is_native_dict(output_type):
        return {"type": "json_object"}

    # Unsupported types
    elif isinstance(output_type, Regex):
        raise TypeError(
            "Regex-based structured outputs are not available with "
            "Mistral."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not available with Mistral."
        )
    else:
        type_name = getattr(output_type, "__name__", str(output_type))
        raise TypeError(
            f"The type {type_name} is not available with Mistral."
        )

`format_str_model_input(model_input)`

Format a string input into a list of messages.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The input string prompt.	required

Returns:

Type	Description
`list`	A list of Mistral message objects.

Source code in outlines/models/mistral.py

@format_input.register(str)
def format_str_model_input(self, model_input: str) -> list:
    """Format a string input into a list of messages.

    Parameters
    ----------
    model_input : str
        The input string prompt.

    Returns
    -------
    list
        A list of Mistral message objects.

    """
    from mistralai import UserMessage

    return [UserMessage(content=model_input)]

`from_mistral(client, model_name=None, async_client=False)`

Create an Outlines Mistral model instance from a mistralai.Mistral client.

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`
`async_client`	`bool`	If True, return an AsyncMistral instance; otherwise, return a Mistral instance.	`False`

Returns:

Type	Description
`Union[Mistral, AsyncMistral]`	An Outlines Mistral or AsyncMistral model instance.

Source code in outlines/models/mistral.py

def from_mistral(
    client: "MistralClient",
    model_name: Optional[str] = None,
    async_client: bool = False,
) -> Union[Mistral, AsyncMistral]:
    """Create an Outlines Mistral model instance from a mistralai.Mistral
    client.

    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.
    async_client : bool
        If True, return an AsyncMistral instance;
        otherwise, return a Mistral instance.

    Returns
    -------
    Union[Mistral, AsyncMistral]
        An Outlines Mistral or AsyncMistral model instance.

    """
    from mistralai import Mistral as MistralClient

    if not isinstance(client, MistralClient):
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "`mistralai.Mistral`."
        )

    if async_client:
        return AsyncMistral(client, model_name)
    else:
        return Mistral(client, model_name)

`mlxlm`

Integration with the mlx_lm library.

Local runtime calls intentionally bypass outlines.exceptions.normalize_provider_errors().

`MLXLM`

Bases: Model

Thin wrapper around an mlx_lm model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the mlx_lm library.

Source code in outlines/models/mlxlm.py

class MLXLM(Model):
    """Thin wrapper around an `mlx_lm` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `mlx_lm` library.

    """

    tensor_library_name = "mlx"

    def __init__(
        self,
        model: "nn.Module",
        tokenizer: "MLXTokenizer",
    ):
        """
        Parameters
        ----------
        model
            An instance of an `mlx_lm` model.
        tokenizer
            An instance of an `mlx_lm` tokenizer or of a compatible
            `transformers` tokenizer.

        """
        self.model = model
        # self.mlx_tokenizer is used by the mlx-lm in its generate function
        self.mlx_tokenizer = tokenizer
        # self.tokenizer is used by the logits processor
        # tokenizer may be a mlx_lm.TokenizerWrapper (whose ._tokenizer is a
        # PreTrainedTokenizerFast) or a PreTrainedTokenizerFast passed directly
        inner = getattr(tokenizer, "_tokenizer", tokenizer)
        hf_tokenizer = inner if isinstance(inner, PreTrainedTokenizerBase) else tokenizer
        self.tokenizer = TransformerTokenizer(hf_tokenizer)
        self.type_adapter = MLXLMTypeAdapter(
            tokenizer=tokenizer,
            has_chat_template=_check_hf_chat_template(tokenizer)
        )

    def generate(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> str:
        """Generate text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        from mlx_lm import generate

        return generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )

    def generate_batch(
        self,
        model_input: list[str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> list[str]:
        """Generate a batch of text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The list of prompts based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        list[str]
            The list of text generated by the model.

        """
        from mlx_lm import batch_generate

        if output_type:
            raise NotImplementedError(
                "mlx-lm does not support constrained generation with batching."
                + "You cannot provide an `output_type` with this method."
            )

        model_input = [self.type_adapter.format_input(item) for item in model_input]

        # Contrarily to the other generate methods, batch_generate requires
        # tokenized prompts
        add_special_tokens = [
            (
                self.mlx_tokenizer.bos_token is None
                or not prompt.startswith(self.mlx_tokenizer.bos_token)
            )
            for prompt in model_input
        ]
        tokenized_model_input = [
            self.mlx_tokenizer.encode(
                model_input[i], add_special_tokens=add_special_tokens[i]
            )
            for i in range(len(model_input))
        ]

        response = batch_generate(
            self.model,
            self.mlx_tokenizer,
            tokenized_model_input,
            **kwargs,
        )

        return response.texts

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> Iterator[str]:
        """Stream text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        from mlx_lm import stream_generate

        for gen_response in stream_generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        ):
            yield gen_response.text

`init(model, tokenizer)`

Parameters:

Name	Type	Description	Default
`model`	`Module`	An instance of an `mlx_lm` model.	required
`tokenizer`	`MLXTokenizer`	An instance of an `mlx_lm` tokenizer or of a compatible `transformers` tokenizer.	required

Source code in outlines/models/mlxlm.py

def __init__(
    self,
    model: "nn.Module",
    tokenizer: "MLXTokenizer",
):
    """
    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        `transformers` tokenizer.

    """
    self.model = model
    # self.mlx_tokenizer is used by the mlx-lm in its generate function
    self.mlx_tokenizer = tokenizer
    # self.tokenizer is used by the logits processor
    # tokenizer may be a mlx_lm.TokenizerWrapper (whose ._tokenizer is a
    # PreTrainedTokenizerFast) or a PreTrainedTokenizerFast passed directly
    inner = getattr(tokenizer, "_tokenizer", tokenizer)
    hf_tokenizer = inner if isinstance(inner, PreTrainedTokenizerBase) else tokenizer
    self.tokenizer = TransformerTokenizer(hf_tokenizer)
    self.type_adapter = MLXLMTypeAdapter(
        tokenizer=tokenizer,
        has_chat_template=_check_hf_chat_template(tokenizer)
    )

`generate(model_input, output_type=None, **kwargs)`

Generate text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/mlxlm.py

def generate(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> str:
    """Generate text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    from mlx_lm import generate

    return generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    )

`generate_batch(model_input, output_type=None, **kwargs)`

Generate a batch of text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`list[str]`	The list of prompts based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`list[str]`	The list of text generated by the model.

Source code in outlines/models/mlxlm.py

def generate_batch(
    self,
    model_input: list[str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> list[str]:
    """Generate a batch of text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The list of prompts based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    list[str]
        The list of text generated by the model.

    """
    from mlx_lm import batch_generate

    if output_type:
        raise NotImplementedError(
            "mlx-lm does not support constrained generation with batching."
            + "You cannot provide an `output_type` with this method."
        )

    model_input = [self.type_adapter.format_input(item) for item in model_input]

    # Contrarily to the other generate methods, batch_generate requires
    # tokenized prompts
    add_special_tokens = [
        (
            self.mlx_tokenizer.bos_token is None
            or not prompt.startswith(self.mlx_tokenizer.bos_token)
        )
        for prompt in model_input
    ]
    tokenized_model_input = [
        self.mlx_tokenizer.encode(
            model_input[i], add_special_tokens=add_special_tokens[i]
        )
        for i in range(len(model_input))
    ]

    response = batch_generate(
        self.model,
        self.mlx_tokenizer,
        tokenized_model_input,
        **kwargs,
    )

    return response.texts

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/mlxlm.py

def generate_stream(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> Iterator[str]:
    """Stream text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    from mlx_lm import stream_generate

    for gen_response in stream_generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    ):
        yield gen_response.text

`MLXLMTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the MLXLM model.

Source code in outlines/models/mlxlm.py

class MLXLMTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `MLXLM` model."""

    def __init__(self, tokenizer: "PreTrainedTokenizer", has_chat_template: bool = False):
        self.tokenizer = tokenizer
        self.has_chat_template = has_chat_template

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {type(model_input)} is not available with "
            "mlx-lm. The available types are `str` and `Chat`."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str:
        if self.has_chat_template:
            return self.format_chat_input(Chat([{"role": "user", "content": model_input}]))
        return model_input

    @format_input.register(Chat)
    def format_chat_input(self, model_input: Chat) -> str:
        if not all(
            isinstance(message["content"], str)
            for message in model_input.messages
        ):
            raise ValueError(
                "mlx-lm does not support multi-modal messages."
                + "The content of each message must be a string."
            )

        return self.tokenizer.apply_chat_template(
            model_input.messages,
            tokenize=False,
            add_generation_prompt=True,
        )

    def format_output_type(
        self, output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional[List[OutlinesLogitsProcessor]]:
        """Generate the logits processor argument to pass to the model.

        Parameters
        ----------
        output_type
            The logits processor provided.

        Returns
        -------
        Optional[list[OutlinesLogitsProcessor]]
            The logits processor argument to be passed to the model.

        """
        if not output_type:
            return None
        return [output_type]

`format_input(model_input)`

Generate the prompt argument to pass to the model.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`str`	The formatted input to be passed to the model.

Source code in outlines/models/mlxlm.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {type(model_input)} is not available with "
        "mlx-lm. The available types are `str` and `Chat`."
    )

`format_output_type(output_type=None)`

Generate the logits processor argument to pass to the model.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor provided.	`None`

Returns:

Type	Description
`Optional[list[OutlinesLogitsProcessor]]`	The logits processor argument to be passed to the model.

Source code in outlines/models/mlxlm.py

def format_output_type(
    self, output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional[List[OutlinesLogitsProcessor]]:
    """Generate the logits processor argument to pass to the model.

    Parameters
    ----------
    output_type
        The logits processor provided.

    Returns
    -------
    Optional[list[OutlinesLogitsProcessor]]
        The logits processor argument to be passed to the model.

    """
    if not output_type:
        return None
    return [output_type]

`from_mlxlm(model, tokenizer)`

Create an Outlines MLXLM model instance from an mlx_lm model and a tokenizer.

Parameters:

Name	Type	Description	Default
`model`	`Module`	An instance of an `mlx_lm` model.	required
`tokenizer`	`MLXTokenizer`	An instance of an `mlx_lm` tokenizer or of a compatible transformers tokenizer.	required

Returns:

Type	Description
`MLXLM`	An Outlines `MLXLM` model instance.

Source code in outlines/models/mlxlm.py

def from_mlxlm(model: "nn.Module", tokenizer: "MLXTokenizer") -> MLXLM:
    """Create an Outlines `MLXLM` model instance from an `mlx_lm` model and a
    tokenizer.

    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        transformers tokenizer.

    Returns
    -------
    MLXLM
        An Outlines `MLXLM` model instance.

    """
    return MLXLM(model, tokenizer)

`ollama`

Integration with the ollama library.

`AsyncOllama`

Bases: AsyncModel

Thin wrapper around the ollama.AsyncClient client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the ollama.AsyncClient client.

Source code in outlines/models/ollama.py

class AsyncOllama(AsyncModel):
    """Thin wrapper around the `ollama.AsyncClient` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `ollama.AsyncClient` client.

    """

    def __init__(
        self,client: "AsyncClient", model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `ollama.Client` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OllamaTypeAdapter()

    async def generate(self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                **kwargs,
            )
        return response.message.content

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `ollama` library does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                stream=True,
                **kwargs,
            )
            async for chunk in stream:
                yield chunk.message.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncClient`	The `ollama.Client` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/ollama.py

def __init__(
    self,client: "AsyncClient", model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `ollama.Client` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OllamaTypeAdapter()

`generate(model_input, output_type=None, **kwargs)` `async`

Generate text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/ollama.py

async def generate(self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )
    return response.message.content

`generate_stream(model_input, output_type=None, **kwargs)` `async`

Stream text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/ollama.py

async def generate_stream( # type: ignore
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            stream=True,
            **kwargs,
        )
        async for chunk in stream:
            yield chunk.message.content

`Ollama`

Bases: Model

Thin wrapper around the ollama.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the ollama.Client client.

Source code in outlines/models/ollama.py

class Ollama(Model):
    """Thin wrapper around the `ollama.Client` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `ollama.Client` client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            The `ollama.Client` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OllamaTypeAdapter()

    def generate(self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        print(self.type_adapter.format_input(model_input))

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                **kwargs,
            )

        return response.message.content

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `ollama` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                stream=True,
                **kwargs,
            )
            for chunk in response:
                yield chunk.message.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	The `ollama.Client` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/ollama.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        The `ollama.Client` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OllamaTypeAdapter()

`generate(model_input, output_type=None, **kwargs)`

Generate text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/ollama.py

def generate(self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    print(self.type_adapter.format_input(model_input))

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )

    return response.message.content

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/ollama.py

def generate_stream(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> Iterator[str]:
    """Stream text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            stream=True,
            **kwargs,
        )
        for chunk in response:
            yield chunk.message.content

`OllamaTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the Ollama model.

Source code in outlines/models/ollama.py

class OllamaTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Ollama` model."""

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the value of the `messages` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        list
            The formatted value of the `messages` argument to be passed to
            the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "Ollama. The only available types are `str`, `list` and `Chat`."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user only passes a prompt.

        """
        return [
            self._create_message("user", model_input)
        ]

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user passes a prompt and images.

        """
        return [
            self._create_message("user", model_input)
        ]

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user passes a Chat instance.

        """
        return [
            self._create_message(message["role"], message["content"])
            for message in model_input.messages
        ]

    def _create_message(self, role: str, content: str | list) -> dict:
        """Create a message."""

        if isinstance(content, str):
            return {
                "role": role,
                "content": content,
            }

        elif isinstance(content, list):
            prompt = content[0]
            images = content[1:]

            if not all(isinstance(image, Image) for image in images):
                raise ValueError("All assets provided must be of type Image")

            return {
                "role": role,
                "content": prompt,
                "images": [image.image_str for image in images],
            }

        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                "The content must be a string or a list containing a string "
                "and a list of images."
            )

    def format_output_type(
        self, output_type: Optional[Any] = None
    ) -> Optional[dict]:
        """Format the output type to pass to the client.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        Optional[str]
            The formatted output type to be passed to the model.

        """
        if output_type is None:
            return None
        elif isinstance(output_type, Regex):
            raise TypeError(
                "Regex-based structured outputs are not supported by Ollama. "
                "Use an open source model in the meantime."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not supported by Ollama. "
                "Use an open source model in the meantime."
            )
        elif JsonSchema.is_json_schema(output_type):
            return cast(dict, JsonSchema.convert_to(output_type, ["dict"]))
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by Ollama. "
                "Consider using a local model instead."
            )

`format_chat_model_input(model_input)`

Generate the value of the messages argument to pass to the client when the user passes a Chat instance.

Source code in outlines/models/ollama.py

@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user passes a Chat instance.

    """
    return [
        self._create_message(message["role"], message["content"])
        for message in model_input.messages
    ]

`format_input(model_input)`

Generate the value of the messages argument to pass to the client.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`list`	The formatted value of the `messages` argument to be passed to the client.

Source code in outlines/models/ollama.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the value of the `messages` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    list
        The formatted value of the `messages` argument to be passed to
        the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "Ollama. The only available types are `str`, `list` and `Chat`."
    )

`format_list_model_input(model_input)`

Generate the value of the messages argument to pass to the client when the user passes a prompt and images.

Source code in outlines/models/ollama.py

@format_input.register(list)
def format_list_model_input(self, model_input: list) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user passes a prompt and images.

    """
    return [
        self._create_message("user", model_input)
    ]

`format_output_type(output_type=None)`

Format the output type to pass to the client.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`Optional[str]`	The formatted output type to be passed to the model.

Source code in outlines/models/ollama.py

def format_output_type(
    self, output_type: Optional[Any] = None
) -> Optional[dict]:
    """Format the output type to pass to the client.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    Optional[str]
        The formatted output type to be passed to the model.

    """
    if output_type is None:
        return None
    elif isinstance(output_type, Regex):
        raise TypeError(
            "Regex-based structured outputs are not supported by Ollama. "
            "Use an open source model in the meantime."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not supported by Ollama. "
            "Use an open source model in the meantime."
        )
    elif JsonSchema.is_json_schema(output_type):
        return cast(dict, JsonSchema.convert_to(output_type, ["dict"]))
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by Ollama. "
            "Consider using a local model instead."
        )

`format_str_model_input(model_input)`

Generate the value of the messages argument to pass to the client when the user only passes a prompt.

Source code in outlines/models/ollama.py

@format_input.register(str)
def format_str_model_input(self, model_input: str) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user only passes a prompt.

    """
    return [
        self._create_message("user", model_input)
    ]

`from_ollama(client, model_name=None)`

Create an Outlines Ollama model instance from an ollama.Client or ollama.AsyncClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[Client, AsyncClient]`	A `ollama.Client` or `ollama.AsyncClient` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[Ollama, AsyncOllama]`	An Outlines `Ollama` or `AsyncOllama` model instance.

Source code in outlines/models/ollama.py

def from_ollama(
    client: Union["Client", "AsyncClient"], model_name: Optional[str] = None
) -> Union[Ollama, AsyncOllama]:
    """Create an Outlines `Ollama` model instance from an `ollama.Client`
    or `ollama.AsyncClient` instance.

    Parameters
    ----------
    client
        A `ollama.Client` or `ollama.AsyncClient` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[Ollama, AsyncOllama]
        An Outlines `Ollama` or `AsyncOllama` model instance.

    """
    from ollama import AsyncClient, Client

    if isinstance(client, Client):
        return Ollama(client, model_name)
    elif isinstance(client, AsyncClient):
        return AsyncOllama(client, model_name)
    else:
        raise ValueError(
            "Invalid client type, the client must be an instance of "
            "`ollama.Client` or `ollama.AsyncClient`."
        )

`openai`

Integration with OpenAI's API.

`AsyncOpenAI`

Bases: AsyncModel

Thin wrapper around the openai.AsyncOpenAI client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.AsyncOpenAI client.

Source code in outlines/models/openai.py

class AsyncOpenAI(AsyncModel):
    """Thin wrapper around the `openai.AsyncOpenAI` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.AsyncOpenAI` client.

    """

    def __init__(
        self,
        client: Union["AsyncOpenAIClient", "AsyncAzureOpenAIClient"],
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OpenAITypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = await self.client.chat.completions.create(
                messages=messages,
                **response_format,
                **inference_kwargs,
            )

        messages = [choice.message for choice in result.choices]
        for message in messages:
            if message.refusal is not None:
                raise GenerationError(
                    f"OpenAI refused to answer the request: {message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `openai` library does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs,
    ) -> AsyncIterator[str]:
        """Stream text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                stream=True,
                messages=messages,
                **response_format,
                **inference_kwargs
            )
            async for chunk in stream:
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Union[AsyncOpenAI, AsyncAzureOpenAI]`	The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/openai.py

def __init__(
    self,
    client: Union["AsyncOpenAIClient", "AsyncAzureOpenAIClient"],
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OpenAITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/openai.py

async def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = await self.client.chat.completions.create(
            messages=messages,
            **response_format,
            **inference_kwargs,
        )

    messages = [choice.message for choice in result.choices]
    for message in messages:
        if message.refusal is not None:
            raise GenerationError(
                f"OpenAI refused to answer the request: {message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/openai.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs,
) -> AsyncIterator[str]:
    """Stream text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            stream=True,
            messages=messages,
            **response_format,
            **inference_kwargs
        )
        async for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`OpenAI`

Bases: Model

Thin wrapper around the openai.OpenAI client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client.

Source code in outlines/models/openai.py

class OpenAI(Model):
    """Thin wrapper around the `openai.OpenAI` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client.

    """

    def __init__(
        self,
        client: Union["OpenAIClient", "AzureOpenAIClient"],
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `openai.OpenAI` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OpenAITypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = self.client.chat.completions.create(
                messages=messages,
                **response_format,
                **inference_kwargs,
            )

        messages = [choice.message for choice in result.choices]
        for message in messages:
            if message.refusal is not None:
                raise GenerationError(
                    f"OpenAI refused to answer the request: {message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `openai` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Stream text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                stream=True,
                messages=messages,
                **response_format,
                **inference_kwargs
            )
            for chunk in stream:
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AzureOpenAI]`	The `openai.OpenAI` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/openai.py

def __init__(
    self,
    client: Union["OpenAIClient", "AzureOpenAIClient"],
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `openai.OpenAI` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OpenAITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/openai.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = self.client.chat.completions.create(
            messages=messages,
            **response_format,
            **inference_kwargs,
        )

    messages = [choice.message for choice in result.choices]
    for message in messages:
        if message.refusal is not None:
            raise GenerationError(
                f"OpenAI refused to answer the request: {message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/openai.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Stream text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            stream=True,
            messages=messages,
            **response_format,
            **inference_kwargs
        )
        for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`OpenAITypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the OpenAI model.

OpenAITypeAdapter is responsible for preparing the arguments to OpenAI's completions.create methods: the input (prompt and possibly image), as well as the output type (only JSON).

Source code in outlines/models/openai.py

class OpenAITypeAdapter(ModelTypeAdapter):
    """Type adapter for the `OpenAI` model.

    `OpenAITypeAdapter` is responsible for preparing the arguments to OpenAI's
    `completions.create` methods: the input (prompt and possibly image), as
    well as the output type (only JSON).

    """

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the `messages` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        dict
            The formatted input to be passed to the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "OpenAI. The only available types are `str`, `list` and `Chat`."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user only passes a prompt.

        """
        return [
            self._create_message("user", model_input)
        ]

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user passes a prompt and images.

        """
        return [
            self._create_message("user", model_input)
        ]

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user passes a Chat instance.

        """
        return [
            self._create_message(message["role"], message["content"])
            for message in model_input.messages
        ]

    def _create_message(self, role: str, content: str | list) -> dict:
        """Create a message."""

        if isinstance(content, str):
            return {
                "role": role,
                "content": content,
            }

        elif isinstance(content, list):
            prompt = content[0]
            images = content[1:]

            if not all(isinstance(image, Image) for image in images):
                raise ValueError("All assets provided must be of type Image")

            image_parts = [
                self._create_img_content(image)
                for image in images
            ]

            return {
                "role": role,
                "content": [
                    {"type": "text", "text": prompt},
                    *image_parts,
                ],
            }

        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                "The content must be a string or a list containing a string "
                "and a list of images."
            )

    def _create_img_content(self, image: Image) -> dict:
        """Create the content for an image input."""
        return {
            "type": "image_url",
            "image_url": {
                "url": f"data:{image.image_format};base64,{image.image_str}"  # noqa: E702
            },
        }

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the `response_format` argument to the client based on the
        output type specified by the user.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        dict
            The formatted output type to be passed to the client.

        """
        # Unsupported languages
        if isinstance(output_type, Regex):
            raise TypeError(
                "Neither regex-based structured outputs nor the `pattern` keyword "
                "in Json Schema are available with OpenAI. Use an open source "
                "model or dottxt instead."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not available with OpenAI. "
                "Use an open source model or dottxt instead."
            )

        if output_type is None:
            return {}
        elif is_native_dict(output_type):
            return self.format_json_mode_type()
        elif JsonSchema.is_json_schema(output_type):
            return self.format_json_output_type(
                cast(dict, JsonSchema.convert_to(output_type, ["dict"]))
            )
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not available with OpenAI. "
                "Use an open source model or dottxt instead."
            )

    def format_json_output_type(self, schema: dict) -> dict:
        """Generate the `response_format` argument to the client when the user
        specified a `Json` output type.

        """
        # OpenAI requires `additionalProperties` to be set to False
        schema = set_additional_properties_false_json_schema(schema)

        return {
            "response_format": {
                "type": "json_schema",
                "json_schema": {
                    "name": "default",
                    "strict": True,
                    "schema": schema,
                },
            }
        }

    def format_json_mode_type(self) -> dict:
        """Generate the `response_format` argument to the client when the user
        specified the output type should be a JSON but without specifying the
        schema (also called "JSON mode").

        """
        return {"response_format": {"type": "json_object"}}

`format_chat_model_input(model_input)`

Generate the value of the messages argument to pass to the client when the user passes a Chat instance.

Source code in outlines/models/openai.py

@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user passes a Chat instance.

    """
    return [
        self._create_message(message["role"], message["content"])
        for message in model_input.messages
    ]

`format_input(model_input)`

Generate the messages argument to pass to the client.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`dict`	The formatted input to be passed to the client.

Source code in outlines/models/openai.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the `messages` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    dict
        The formatted input to be passed to the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "OpenAI. The only available types are `str`, `list` and `Chat`."
    )

`format_json_mode_type()`

Generate the response_format argument to the client when the user specified the output type should be a JSON but without specifying the schema (also called "JSON mode").

Source code in outlines/models/openai.py

def format_json_mode_type(self) -> dict:
    """Generate the `response_format` argument to the client when the user
    specified the output type should be a JSON but without specifying the
    schema (also called "JSON mode").

    """
    return {"response_format": {"type": "json_object"}}

`format_json_output_type(schema)`

Generate the response_format argument to the client when the user specified a Json output type.

Source code in outlines/models/openai.py

def format_json_output_type(self, schema: dict) -> dict:
    """Generate the `response_format` argument to the client when the user
    specified a `Json` output type.

    """
    # OpenAI requires `additionalProperties` to be set to False
    schema = set_additional_properties_false_json_schema(schema)

    return {
        "response_format": {
            "type": "json_schema",
            "json_schema": {
                "name": "default",
                "strict": True,
                "schema": schema,
            },
        }
    }

`format_list_model_input(model_input)`

Generate the value of the messages argument to pass to the client when the user passes a prompt and images.

Source code in outlines/models/openai.py

@format_input.register(list)
def format_list_model_input(self, model_input: list) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user passes a prompt and images.

    """
    return [
        self._create_message("user", model_input)
    ]

`format_output_type(output_type=None)`

Generate the response_format argument to the client based on the output type specified by the user.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`dict`	The formatted output type to be passed to the client.

Source code in outlines/models/openai.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the `response_format` argument to the client based on the
    output type specified by the user.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    dict
        The formatted output type to be passed to the client.

    """
    # Unsupported languages
    if isinstance(output_type, Regex):
        raise TypeError(
            "Neither regex-based structured outputs nor the `pattern` keyword "
            "in Json Schema are available with OpenAI. Use an open source "
            "model or dottxt instead."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not available with OpenAI. "
            "Use an open source model or dottxt instead."
        )

    if output_type is None:
        return {}
    elif is_native_dict(output_type):
        return self.format_json_mode_type()
    elif JsonSchema.is_json_schema(output_type):
        return self.format_json_output_type(
            cast(dict, JsonSchema.convert_to(output_type, ["dict"]))
        )
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not available with OpenAI. "
            "Use an open source model or dottxt instead."
        )

`format_str_model_input(model_input)`

Generate the value of the messages argument to pass to the client when the user only passes a prompt.

Source code in outlines/models/openai.py

@format_input.register(str)
def format_str_model_input(self, model_input: str) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user only passes a prompt.

    """
    return [
        self._create_message("user", model_input)
    ]

`from_openai(client, model_name=None)`

Create an Outlines OpenAI or AsyncOpenAI model instance from an openai.OpenAI or openai.AsyncOpenAI client.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]`	An `openai.OpenAI`, `openai.AsyncOpenAI`, `openai.AzureOpenAI` or `openai.AsyncAzureOpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`OpenAI`	An Outlines `OpenAI` or `AsyncOpenAI` model instance.

Source code in outlines/models/openai.py

def from_openai(
    client: Union[
        "OpenAIClient",
        "AsyncOpenAIClient",
        "AzureOpenAIClient",
        "AsyncAzureOpenAIClient",
    ],
    model_name: Optional[str] = None,
) -> Union[OpenAI, AsyncOpenAI]:
    """Create an Outlines `OpenAI` or `AsyncOpenAI` model instance from an
    `openai.OpenAI` or `openai.AsyncOpenAI` client.

    Parameters
    ----------
    client
        An `openai.OpenAI`, `openai.AsyncOpenAI`, `openai.AzureOpenAI` or
        `openai.AsyncAzureOpenAI` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    OpenAI
        An Outlines `OpenAI` or `AsyncOpenAI` model instance.

    """
    import openai

    if isinstance(client, openai.OpenAI):
        return OpenAI(client, model_name)
    elif isinstance(client, openai.AsyncOpenAI):
        return AsyncOpenAI(client, model_name)
    else:
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "+ `openai.OpenAI` or `openai.AsyncOpenAI`."
        )

`sglang`

Integration with an SGLang server.

`AsyncSGLang`

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py

class AsyncSGLang(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.
        model_name
            The name of the model to use.

        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using `sglang`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The SGLang server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "SGLang does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Return a text generator.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                **client_args,
                stream=True,
            )
            async for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`		An `openai.AsyncOpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Parameters:

Name	Type	Description	Default
`client`		An `openai.AsyncOpenAI` client instance.	required

Source code in outlines/models/sglang.py

def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.
    model_name
        The name of the model to use.

    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using sglang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/sglang.py

async def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using `sglang`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The SGLang server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Return a text generator.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Return a text generator.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )
        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`SGLang`

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py

class SGLang(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The SGLang server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "SGLang does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`		An `openai.OpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/sglang.py

def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using SGLang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/sglang.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The SGLang server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using SGLang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`SGLangTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the SGLang and AsyncSGLang models.

Source code in outlines/models/sglang.py

class SGLangTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `SGLang` and `AsyncSGLang` models."""

    def format_input(self, model_input: Union[Chat, list, str]) -> list:
        """Generate the value of the messages argument to pass to the client.

        We rely on the OpenAITypeAdapter to format the input as the sglang
        server expects input in the same format as OpenAI.

        Parameters
        ----------
        model_input
            The input passed by the user.

        Returns
        -------
        list
            The formatted input to be passed to the client.

        """
        return OpenAITypeAdapter().format_input(model_input)

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the client.

        Parameters
        ----------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The formatted output type to be passed to the client.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            warnings.warn(
                "SGLang grammar-based structured outputs expects an EBNF "
                "grammar instead of a Lark grammar as is generally used in "
                "Outlines. The grammar cannot be used as a structured output "
                "type with an outlines backend, it is only compatible with "
                "the sglang and llguidance backends."
            )
            return {"extra_body": {"ebnf": term.definition}}
        elif isinstance(term, JsonSchema):
            return OpenAITypeAdapter().format_json_output_type(
                json.loads(term.schema)
            )
        else:
            return {"extra_body": {"regex": to_regex(term)}}

`format_input(model_input)`

Generate the value of the messages argument to pass to the client.

We rely on the OpenAITypeAdapter to format the input as the sglang server expects input in the same format as OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The input passed by the user.	required

Returns:

Type	Description
`list`	The formatted input to be passed to the client.

Source code in outlines/models/sglang.py

def format_input(self, model_input: Union[Chat, list, str]) -> list:
    """Generate the value of the messages argument to pass to the client.

    We rely on the OpenAITypeAdapter to format the input as the sglang
    server expects input in the same format as OpenAI.

    Parameters
    ----------
    model_input
        The input passed by the user.

    Returns
    -------
    list
        The formatted input to be passed to the client.

    """
    return OpenAITypeAdapter().format_input(model_input)

`format_output_type(output_type=None)`

Generate the structured output argument to pass to the client.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The structured output type provided.	`None`

Returns:

Type	Description
`dict`	The formatted output type to be passed to the client.

Source code in outlines/models/sglang.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the client.

    Parameters
    ----------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The formatted output type to be passed to the client.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        warnings.warn(
            "SGLang grammar-based structured outputs expects an EBNF "
            "grammar instead of a Lark grammar as is generally used in "
            "Outlines. The grammar cannot be used as a structured output "
            "type with an outlines backend, it is only compatible with "
            "the sglang and llguidance backends."
        )
        return {"extra_body": {"ebnf": term.definition}}
    elif isinstance(term, JsonSchema):
        return OpenAITypeAdapter().format_json_output_type(
            json.loads(term.schema)
        )
    else:
        return {"extra_body": {"regex": to_regex(term)}}

`from_sglang(client, model_name=None)`

Create a SGLang or AsyncSGLang instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI]`	An `openai.OpenAI` or `openai.AsyncOpenAI` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[SGLang, AsyncSGLang]`	An Outlines `SGLang` or `AsyncSGLang` model instance.

Source code in outlines/models/sglang.py

def from_sglang(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[SGLang, AsyncSGLang]:
    """Create a `SGLang` or `AsyncSGLang` instance from an `openai.OpenAI` or
    `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[SGLang, AsyncSGLang]
        An Outlines `SGLang` or `AsyncSGLang` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return SGLang(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncSGLang(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

`tgi`

Integration with a TGI server.

`AsyncTGI`

Bases: AsyncModel

Thin async wrapper around a huggingface_hub.AsyncInferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.AsyncInferenceClient client.

Source code in outlines/models/tgi.py

class AsyncTGI(AsyncModel):
    """Thin async wrapper around a `huggingface_hub.AsyncInferenceClient`
    client used to communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.AsyncInferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `AsyncInferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    async def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.text_generation(**client_args)

        return response

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("TGI does not support batch inference.")

    async def generate_stream( # type: ignore
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.text_generation(
                **client_args, stream=True
            )
            async for chunk in stream:  # pragma: no cover
                yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

`init(client)`

Parameters:

Name	Type	Description	Default
`client`		A huggingface `AsyncInferenceClient` client instance.	required

Source code in outlines/models/tgi.py

def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `AsyncInferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/tgi.py

async def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.text_generation(**client_args)

    return response

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py

async def generate_stream( # type: ignore
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.text_generation(
            **client_args, stream=True
        )
        async for chunk in stream:  # pragma: no cover
            yield chunk

`TGI`

Bases: Model

Thin wrapper around a huggingface_hub.InferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.InferenceClient client.

Source code in outlines/models/tgi.py

class TGI(Model):
    """Thin wrapper around a `huggingface_hub.InferenceClient` client used to
    communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.InferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `InferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            return self.client.text_generation(**client_args)

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("TGI does not support batch inference.")

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.text_generation(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

`init(client)`

Parameters:

Name	Type	Description	Default
`client`		A huggingface `InferenceClient` client instance.	required

Source code in outlines/models/tgi.py

def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `InferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/tgi.py

def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        return self.client.text_generation(**client_args)

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py

def generate_stream(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.text_generation(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            yield chunk

`TGITypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the TGI and AsyncTGI models.

Source code in outlines/models/tgi.py

class TGITypeAdapter(ModelTypeAdapter):
    """Type adapter for the `TGI` and `AsyncTGI` models."""

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the client.

        Argument
        --------
        model_input
            The input passed by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {input} is not available with TGI. "
            + "The only available type is `str`."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str:
        return model_input

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the client.

        Argument
        --------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The structured output argument to pass to the client.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            raise NotImplementedError(
                "TGI does not support CFG-based structured outputs."
            )
        elif isinstance(term, JsonSchema):
            return {
                "grammar": {
                    "type": "json",
                    "value": json.loads(term.schema),
                }
            }
        else:
            return {
                "grammar": {
                    "type": "regex",
                    "value": to_regex(term),
                }
            }

`format_input(model_input)`

Generate the prompt argument to pass to the client.

Argument

model_input The input passed by the user.

Returns:

Type	Description
`str`	The formatted input to be passed to the model.

Source code in outlines/models/tgi.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the client.

    Argument
    --------
    model_input
        The input passed by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {input} is not available with TGI. "
        + "The only available type is `str`."
    )

`format_output_type(output_type=None)`

Generate the structured output argument to pass to the client.

Argument

output_type The structured output type provided.

Returns:

Type	Description
`dict`	The structured output argument to pass to the client.

Source code in outlines/models/tgi.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the client.

    Argument
    --------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The structured output argument to pass to the client.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        raise NotImplementedError(
            "TGI does not support CFG-based structured outputs."
        )
    elif isinstance(term, JsonSchema):
        return {
            "grammar": {
                "type": "json",
                "value": json.loads(term.schema),
            }
        }
    else:
        return {
            "grammar": {
                "type": "regex",
                "value": to_regex(term),
            }
        }

`from_tgi(client)`

Create an Outlines TGI or AsyncTGI model instance from an huggingface_hub.InferenceClient or huggingface_hub.AsyncInferenceClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[InferenceClient, AsyncInferenceClient]`	An `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient` instance.	required

Returns:

Type	Description
`Union[TGI, AsyncTGI]`	An Outlines `TGI` or `AsyncTGI` model instance.

Source code in outlines/models/tgi.py

def from_tgi(
    client: Union["InferenceClient", "AsyncInferenceClient"],
) -> Union[TGI, AsyncTGI]:
    """Create an Outlines `TGI` or `AsyncTGI` model instance from an
    `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient`
    instance.

    Parameters
    ----------
    client
        An `huggingface_hub.InferenceClient` or
        `huggingface_hub.AsyncInferenceClient` instance.

    Returns
    -------
    Union[TGI, AsyncTGI]
        An Outlines `TGI` or `AsyncTGI` model instance.

    """
    from huggingface_hub import AsyncInferenceClient, InferenceClient

    if isinstance(client, InferenceClient):
        return TGI(client)
    elif isinstance(client, AsyncInferenceClient):
        return AsyncTGI(client)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            + "Please provide an HuggingFace InferenceClient "
            + "or AsyncInferenceClient instance."
        )

`tokenizer`

`Tokenizer`

Bases: Hashable, Protocol

Source code in outlines/models/tokenizer.py

class Tokenizer(Hashable, Protocol):
    eos_token: str
    eos_token_id: int
    pad_token_id: int
    vocabulary: Dict[str, int]
    special_tokens: Set[str]

    def encode(
        self, prompt: Union[str, List[str]]
    ) -> "Tuple['NDArray[np.int64]', 'NDArray[np.int64]']":
        """Translate the input prompts into arrays of token ids and attention mask."""
        ...

    def decode(self, token_ids: "NDArray[np.int64]") -> List[str]:
        """Translate an array of token ids to a string or list of strings."""
        ...

    def convert_token_to_string(self, token: str) -> str:
        """Convert a token to its equivalent string.

        This is for instance useful for BPE tokenizers where whitespaces are
        represented by the special characted `Ġ`. This prevents matching a raw
        token that includes `Ġ` with a string.
        """
        ...

`convert_token_to_string(token)`

Convert a token to its equivalent string.

This is for instance useful for BPE tokenizers where whitespaces are represented by the special characted Ġ. This prevents matching a raw token that includes Ġ with a string.

Source code in outlines/models/tokenizer.py

def convert_token_to_string(self, token: str) -> str:
    """Convert a token to its equivalent string.

    This is for instance useful for BPE tokenizers where whitespaces are
    represented by the special characted `Ġ`. This prevents matching a raw
    token that includes `Ġ` with a string.
    """
    ...

`decode(token_ids)`

Translate an array of token ids to a string or list of strings.

Source code in outlines/models/tokenizer.py

23
24
25

def decode(self, token_ids: "NDArray[np.int64]") -> List[str]:
    """Translate an array of token ids to a string or list of strings."""
    ...

`encode(prompt)`

Translate the input prompts into arrays of token ids and attention mask.

Source code in outlines/models/tokenizer.py

def encode(
    self, prompt: Union[str, List[str]]
) -> "Tuple['NDArray[np.int64]', 'NDArray[np.int64]']":
    """Translate the input prompts into arrays of token ids and attention mask."""
    ...

`transformers`

Integration with the transformers library.

Local runtime calls intentionally bypass outlines.exceptions.normalize_provider_errors().

`TransformerTokenizer`

Bases: Tokenizer

Represents a tokenizer for models in the transformers library.

Source code in outlines/models/transformers.py

class TransformerTokenizer(Tokenizer):
    """Represents a tokenizer for models in the `transformers` library."""

    def __init__(self, tokenizer: "PreTrainedTokenizer", **kwargs):
        self.tokenizer = tokenizer
        self.eos_token_id = self.tokenizer.eos_token_id
        self.eos_token = self.tokenizer.eos_token
        self.get_vocab = self.tokenizer.get_vocab

        if self.tokenizer.pad_token_id is None:
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
            self.pad_token_id = self.eos_token_id
        else:
            self.pad_token_id = self.tokenizer.pad_token_id
            self.pad_token = self.tokenizer.pad_token

        self.special_tokens = set(self.tokenizer.all_special_tokens)

        self.vocabulary = self.tokenizer.get_vocab()
        self.is_llama = isinstance(self.tokenizer, get_llama_tokenizer_types())

    def encode(
        self, prompt: Union[str, List[str]], **kwargs
    ) -> Tuple["torch.LongTensor", "torch.LongTensor"]:
        kwargs["padding"] = True
        kwargs["return_tensors"] = "pt"
        output = self.tokenizer(prompt, **kwargs)
        return output["input_ids"], output["attention_mask"]

    def decode(self, token_ids: "torch.LongTensor") -> List[str]:
        text = self.tokenizer.batch_decode(token_ids, skip_special_tokens=True)
        return text

    def convert_token_to_string(self, token: str) -> str:
        string = self.tokenizer.convert_tokens_to_string([token])

        if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
            return " " + string

        return string

    def __eq__(self, other):
        if isinstance(other, type(self)):
            if hasattr(self, "model_name") and hasattr(self, "kwargs"):
                return (
                    other.model_name == self.model_name and other.kwargs == self.kwargs
                )
            else:
                return other.tokenizer == self.tokenizer
        return NotImplemented

    def __hash__(self):
        from datasets.fingerprint import Hasher

        return hash(Hasher.hash(self.tokenizer))

    def __getstate__(self):
        state = {"tokenizer": self.tokenizer}
        return state

    def __setstate__(self, state):
        self.__init__(state["tokenizer"])

`Transformers`

Bases: Model

Thin wrapper around a transformers model and a transformers tokenizer.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and tokenizer.

Source code in outlines/models/transformers.py

class Transformers(Model):
    """Thin wrapper around a `transformers` model and a `transformers`
    tokenizer.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    tokenizer.

    """

    def __init__(
        self,
        model: "PreTrainedModel",
        tokenizer: "PreTrainedTokenizer",
        *,
        device_dtype: Optional["torch.dtype"] = None,
    ):
        """
        Parameters:
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        tokenizer
            A `PreTrainedTokenizer`, or any tokenizer that is compatible with
            the `transformers` API for tokenizers.
        device_dtype
            The dtype to use for the model. If not provided, the model will use
            the default dtype.

        """
        # We need to handle the cases in which jax/flax or tensorflow
        # is not available in the environment.
        try:
            from transformers import FlaxPreTrainedModel
        except ImportError:  # pragma: no cover
            FlaxPreTrainedModel = None

        try:
            from transformers import TFPreTrainedModel
        except ImportError:  # pragma: no cover
            TFPreTrainedModel = None

        tokenizer.padding_side = "left"
        self.model = model
        self.hf_tokenizer = tokenizer
        self.tokenizer = TransformerTokenizer(tokenizer)
        self.device_dtype = device_dtype
        self.type_adapter = TransformersTypeAdapter(
            tokenizer=tokenizer,
            has_chat_template=_check_hf_chat_template(tokenizer)
        )

        if (
            FlaxPreTrainedModel is not None
            and isinstance(model, FlaxPreTrainedModel)
        ):  # pragma: no cover
            self.tensor_library_name = "jax"
            warnings.warn("""
                Support for `jax` has been deprecated and will be removed in
                version 1.4.0 of Outlines. Please use `torch` instead.
                Transformers models using `jax` do not support structured
                generation.
                """,
                DeprecationWarning,
                stacklevel=2,
            )
        elif (
            TFPreTrainedModel is not None
            and isinstance(model, TFPreTrainedModel)
        ):  # pragma: no cover
            self.tensor_library_name = "tensorflow"
            warnings.warn("""
                Support for `tensorflow` has been deprecated and will be removed in
                version 1.4.0 of Outlines. Please use `torch` instead.
                Transformers models using `tensorflow` do not support structured
                generation.
                """,
                DeprecationWarning,
                stacklevel=2,
            )
        else:
            self.tensor_library_name = "torch"

    def _prepare_model_inputs(
        self,
        model_input,
        is_batch: bool = False,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        # Format validation
        if is_batch:
            prompts = [
                self.type_adapter.format_input(item)
                for item in model_input
            ]
        else:
            prompts = self.type_adapter.format_input(model_input)
        input_ids, attention_mask = self.tokenizer.encode(prompts)
        inputs = {
            "input_ids": input_ids.to(self.model.device),
            "attention_mask": (
                attention_mask.to(self.model.device, dtype=self.device_dtype)
                if self.device_dtype is not None
                else attention_mask.to(self.model.device)
            ),
        }

        return prompts, inputs

    def generate(
        self,
        model_input: Union[str, dict, Chat],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str]]:
        """Generate text using `transformers`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response. For
            multi-modal models, the input should be a dictionary containing the
            `text` key with a value of type `Union[str, List[str]]` and the
            other keys required by the model.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            of the `transformers` model.

        Returns
        -------
        Union[str, List[str]]
            The text generated by the model.

        """
        prompts, inputs = self._prepare_model_inputs(model_input, False)
        logits_processor = self.type_adapter.format_output_type(output_type)

        generated_ids = self._generate_output_seq(
            prompts,
            inputs,
            logits_processor=logits_processor,
            **inference_kwargs,
        )

        # required for multi-modal models that return a 2D tensor even when
        # num_return_sequences is 1
        num_samples = inference_kwargs.get("num_return_sequences", 1)
        if num_samples == 1 and len(generated_ids.shape) == 2:
            generated_ids = generated_ids.squeeze(0)

        return self._decode_generation(generated_ids)

    def generate_batch(
        self,
        model_input: List[Union[str, dict, Chat]],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> List[Union[str, List[str]]]:
        """"""
        prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore
        logits_processor = self.type_adapter.format_output_type(output_type)

        generated_ids = self._generate_output_seq(
            prompts, inputs, logits_processor=logits_processor, **inference_kwargs
        )

        # if there are multiple samples per input, convert generated_id to 3D
        num_samples = inference_kwargs.get("num_return_sequences", 1)
        if num_samples > 1:
            generated_ids = generated_ids.view(len(model_input), num_samples, -1)

        return self._decode_generation(generated_ids)

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `transformers` models.

        TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

        """
        raise NotImplementedError(
            "Streaming is not implemented for Transformers models."
        )

    def _generate_output_seq(self, prompts, inputs, **inference_kwargs):
        input_ids = inputs["input_ids"]

        output_ids = self.model.generate(
            **inputs,
            **inference_kwargs,
        )

        # encoder-decoder returns output_ids only, decoder-only returns full seq ids
        if self.model.config.is_encoder_decoder:
            generated_ids = output_ids
        else:
            generated_ids = output_ids[:, input_ids.shape[1] :]

        return generated_ids

    def _decode_generation(self, generated_ids: "torch.Tensor"):
        if len(generated_ids.shape) == 1:
            return self.tokenizer.decode([generated_ids])[0]
        elif len(generated_ids.shape) == 2:
            return self.tokenizer.decode(generated_ids)
        elif len(generated_ids.shape) == 3:
            return [
                self.tokenizer.decode(generated_ids[i])
                for i in range(len(generated_ids))
            ]
        else:  # pragma: no cover
            raise TypeError(
                "Generated outputs aren't 1D, 2D or 3D, but instead are "
                f"{generated_ids.shape}"
            )

`init(model, tokenizer, *, device_dtype=None)`

Parameters:

model A PreTrainedModel, or any model that is compatible with the transformers API for models. tokenizer A PreTrainedTokenizer, or any tokenizer that is compatible with the transformers API for tokenizers. device_dtype The dtype to use for the model. If not provided, the model will use the default dtype.

Source code in outlines/models/transformers.py

def __init__(
    self,
    model: "PreTrainedModel",
    tokenizer: "PreTrainedTokenizer",
    *,
    device_dtype: Optional["torch.dtype"] = None,
):
    """
    Parameters:
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    tokenizer
        A `PreTrainedTokenizer`, or any tokenizer that is compatible with
        the `transformers` API for tokenizers.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    """
    # We need to handle the cases in which jax/flax or tensorflow
    # is not available in the environment.
    try:
        from transformers import FlaxPreTrainedModel
    except ImportError:  # pragma: no cover
        FlaxPreTrainedModel = None

    try:
        from transformers import TFPreTrainedModel
    except ImportError:  # pragma: no cover
        TFPreTrainedModel = None

    tokenizer.padding_side = "left"
    self.model = model
    self.hf_tokenizer = tokenizer
    self.tokenizer = TransformerTokenizer(tokenizer)
    self.device_dtype = device_dtype
    self.type_adapter = TransformersTypeAdapter(
        tokenizer=tokenizer,
        has_chat_template=_check_hf_chat_template(tokenizer)
    )

    if (
        FlaxPreTrainedModel is not None
        and isinstance(model, FlaxPreTrainedModel)
    ):  # pragma: no cover
        self.tensor_library_name = "jax"
        warnings.warn("""
            Support for `jax` has been deprecated and will be removed in
            version 1.4.0 of Outlines. Please use `torch` instead.
            Transformers models using `jax` do not support structured
            generation.
            """,
            DeprecationWarning,
            stacklevel=2,
        )
    elif (
        TFPreTrainedModel is not None
        and isinstance(model, TFPreTrainedModel)
    ):  # pragma: no cover
        self.tensor_library_name = "tensorflow"
        warnings.warn("""
            Support for `tensorflow` has been deprecated and will be removed in
            version 1.4.0 of Outlines. Please use `torch` instead.
            Transformers models using `tensorflow` do not support structured
            generation.
            """,
            DeprecationWarning,
            stacklevel=2,
        )
    else:
        self.tensor_library_name = "torch"

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using transformers.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[str, dict, Chat]`	The prompt based on which the model will generate a response. For multi-modal models, the input should be a dictionary containing the `text` key with a value of type `Union[str, List[str]]` and the other keys required by the model.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method of the `transformers` model.	`{}`

Returns:

Type	Description
`Union[str, List[str]]`	The text generated by the model.

Source code in outlines/models/transformers.py

def generate(
    self,
    model_input: Union[str, dict, Chat],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str]]:
    """Generate text using `transformers`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response. For
        multi-modal models, the input should be a dictionary containing the
        `text` key with a value of type `Union[str, List[str]]` and the
        other keys required by the model.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        of the `transformers` model.

    Returns
    -------
    Union[str, List[str]]
        The text generated by the model.

    """
    prompts, inputs = self._prepare_model_inputs(model_input, False)
    logits_processor = self.type_adapter.format_output_type(output_type)

    generated_ids = self._generate_output_seq(
        prompts,
        inputs,
        logits_processor=logits_processor,
        **inference_kwargs,
    )

    # required for multi-modal models that return a 2D tensor even when
    # num_return_sequences is 1
    num_samples = inference_kwargs.get("num_return_sequences", 1)
    if num_samples == 1 and len(generated_ids.shape) == 2:
        generated_ids = generated_ids.squeeze(0)

    return self._decode_generation(generated_ids)

`generate_batch(model_input, output_type=None, **inference_kwargs)`

Source code in outlines/models/transformers.py

def generate_batch(
    self,
    model_input: List[Union[str, dict, Chat]],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> List[Union[str, List[str]]]:
    """"""
    prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore
    logits_processor = self.type_adapter.format_output_type(output_type)

    generated_ids = self._generate_output_seq(
        prompts, inputs, logits_processor=logits_processor, **inference_kwargs
    )

    # if there are multiple samples per input, convert generated_id to 3D
    num_samples = inference_kwargs.get("num_return_sequences", 1)
    if num_samples > 1:
        generated_ids = generated_ids.view(len(model_input), num_samples, -1)

    return self._decode_generation(generated_ids)

`generate_stream(model_input, output_type, **inference_kwargs)`

Not available for transformers models.

TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

Source code in outlines/models/transformers.py

def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `transformers` models.

    TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

    """
    raise NotImplementedError(
        "Streaming is not implemented for Transformers models."
    )

`TransformersMultiModal`

Bases: Transformers

Thin wrapper around a transformers model and a transformers processor.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and processor.

Source code in outlines/models/transformers.py

class TransformersMultiModal(Transformers):
    """Thin wrapper around a `transformers` model and a `transformers`
    processor.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    processor.

    """

    def __init__(
        self,
        model: "PreTrainedModel",
        processor,
        *,
        device_dtype: Optional["torch.dtype"] = None,
    ):
        """Create a TransformersMultiModal model instance

        We rely on the `__init__` method of the `Transformers` class to handle
        most of the initialization and then add elements specific to multimodal
        models.

        Parameters
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        processor
            A `ProcessorMixin` instance.
        device_dtype
            The dtype to use for the model. If not provided, the model will use
            the default dtype.

        """
        self.processor = processor
        self.processor.padding_side = "left"
        self.processor.pad_token = "[PAD]"

        tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

        super().__init__(model, tokenizer, device_dtype=device_dtype)

        self.type_adapter = TransformersMultiModalTypeAdapter(
            tokenizer=tokenizer
        )

    def _prepare_model_inputs(
        self,
        model_input,
        is_batch: bool = False,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        if is_batch:
            prompts = [
                self.type_adapter.format_input(item) for item in model_input
            ]
        else:
            prompts = self.type_adapter.format_input(model_input)

        # The expected format is a single dict
        if is_batch:
            merged_prompts = defaultdict(list)
            for d in prompts:
                for key, value in d.items():
                    if key == "text":
                        merged_prompts[key].append(value)
                    else:
                        merged_prompts[key].extend(value)
        else:
            merged_prompts = prompts # type: ignore

        inputs = self.processor(
            **merged_prompts, padding=True, return_tensors="pt"
        )
        if self.device_dtype is not None:
            inputs = inputs.to(self.model.device, dtype=self.device_dtype)
        else:
            inputs = inputs.to(self.model.device)

        return merged_prompts["text"], inputs

`init(model, processor, *, device_dtype=None)`

Create a TransformersMultiModal model instance

We rely on the __init__ method of the Transformers class to handle most of the initialization and then add elements specific to multimodal models.

Parameters:

Name	Type	Description	Default
`model`	`PreTrainedModel`	A `PreTrainedModel`, or any model that is compatible with the `transformers` API for models.	required
`processor`		A `ProcessorMixin` instance.	required
`device_dtype`	`Optional[dtype]`	The dtype to use for the model. If not provided, the model will use the default dtype.	`None`

Source code in outlines/models/transformers.py

def __init__(
    self,
    model: "PreTrainedModel",
    processor,
    *,
    device_dtype: Optional["torch.dtype"] = None,
):
    """Create a TransformersMultiModal model instance

    We rely on the `__init__` method of the `Transformers` class to handle
    most of the initialization and then add elements specific to multimodal
    models.

    Parameters
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    processor
        A `ProcessorMixin` instance.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    """
    self.processor = processor
    self.processor.padding_side = "left"
    self.processor.pad_token = "[PAD]"

    tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

    super().__init__(model, tokenizer, device_dtype=device_dtype)

    self.type_adapter = TransformersMultiModalTypeAdapter(
        tokenizer=tokenizer
    )

`TransformersMultiModalTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for TransformersMultiModal model.

Source code in outlines/models/transformers.py

class TransformersMultiModalTypeAdapter(ModelTypeAdapter):
    """Type adapter for `TransformersMultiModal` model."""

    def __init__(self, **kwargs):
        self.tokenizer = kwargs.get("tokenizer")

    @singledispatchmethod
    def format_input(self, model_input):
        """Fomat the prompt arguments to pass to the model.

        Argument
        --------
        model_input
            The input passed by the user.

        Returns
        -------
        dict
            The formatted input.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available. Please "
            + "provide a list containing a text prompt and assets "
            + "(`Image`, `Audio` or `Video` instances) supported by your "
            + "model or a `Chat` instance."
        )

    @format_input.register(Chat)
    def format_chat_input(self, model_input: Chat) -> dict:
        conversation = []
        assets = []

        # process each message, convert if needed to standardized multimodal chat template format
        # and collect assets for HF processor
        for message in model_input.messages:
            processed_message, message_assets = self._prepare_message(
                message["role"], message["content"]
            )
            conversation.append(processed_message)
            assets.extend(message_assets)

        formatted_prompt = self.tokenizer.apply_chat_template(
            conversation,
            tokenize=False,
            add_generation_prompt=True
        )
        # use the formatted prompt and the assets to format the input
        return self.format_list_input([formatted_prompt, *assets])

    def _prepare_message(self, role: str, content: str | list) -> tuple[dict, list]:
        """Create a message."""
        if isinstance(content, str):
            return {"role": role, "content": content}, []

        elif isinstance(content, list):
            if all(isinstance(item, dict) for item in content): # HF multimodal chat template
                return {"role": role, "content": content}, self._extract_assets_from_content(content)
            else: # list of string + assets
                prompt = content[0]
                assets = content[1:]
                assets_dict = [self._format_asset_for_template(asset) for asset in assets]

                return {"role": role, "content": [
                    {"type": "text", "text": prompt},
                    *assets_dict
                ]}, assets
        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                + "The content must be a string or a list containing text and assets "
                + "or a list of dict items with explicit types."
            )

    def _extract_assets_from_content(self, content: list) -> list:
        """Process a list of dict items."""
        assets = []

        for item in content:
            if len(item) > 2:
                raise ValueError(
                    f"Found item with multiple keys: {item}. "
                    + "Each item in the content list must be a dictionary with a 'type' key and a single asset key. "
                    + "To include multiple assets, use separate dictionary items. "
                    + "For example: [{{'type': 'image', 'image': image1}}, {{'type': 'image', 'image': image2}}]. "
                )

            if "type" not in item:
                raise ValueError(
                    "Each item in the content list must be a dictionary with a 'type' key. "
                    + "Valid types are 'text', 'image', 'video', or 'audio'. "
                    + "For instance {{'type': 'text', 'text': 'your message'}}. "
                    + f"Found item without 'type' key: {item}"
                )
            if item["type"] == "text":
                continue
            elif item["type"] in ["image", "video", "audio"]:
                asset_key = item["type"]
                if asset_key not in item:
                    raise ValueError(
                        f"Item with type '{asset_key}' must contain a '{asset_key}' key. "
                        + f"Found item: {item}"
                    )
                if isinstance(item[asset_key], (Image, Video, Audio)):
                    assets.append(item[asset_key])
                else:
                    raise ValueError(
                        "Assets must be of type `Image`, `Video` or `Audio`. "
                        + f"Unsupported asset type: {type(item[asset_key])}"
                    )
            else:
                raise ValueError(
                    "Content must be 'text', 'image', 'video' or 'audio'. "
                    + f"Unsupported content type: {item['type']}")
        return assets

    def _format_asset_for_template(self, asset: Image | Video | Audio) -> dict:
        """Process an asset."""
        if isinstance(asset, Image):
            return {"type": "image", "image": asset}
        elif isinstance(asset, Video):
            return {"type": "video", "video": asset}
        elif isinstance(asset, Audio):
            return {"type": "audio", "audio": asset}
        else:
            raise ValueError(
                "Assets must be of type `Image`, `Video` or `Audio`. "
                + f"Unsupported asset type: {type(asset)}"
            )

    @format_input.register(list)
    def format_list_input(self, model_input: list) -> dict:
        prompt = model_input[0]
        assets = model_input[1:]

        if not assets:  # handle empty assets case
            return {"text": prompt}

        asset_types = set(type(asset) for asset in assets)
        if len(asset_types) > 1:
            raise ValueError(
                "All assets must be of the same type. "
                + f"Found types: {asset_types}"
            )
        asset_type = asset_types.pop()

        if asset_type == Image:
            return {
                "text": prompt,
                "images": [asset.image for asset in assets]
            }
        elif asset_type == Audio: # pragma: no cover
            return {
                "text": prompt,
                "audio": [asset.audio for asset in assets]
            }
        elif asset_type == Video: # pragma: no cover
            return {
                "text": prompt,
                "videos": [asset.video for asset in assets]
            }
        else:
            raise ValueError(f"Unsupported asset type: {asset_type}")

    def format_output_type(
        self,
        output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional["LogitsProcessorList"]:
        """Generate the logits processor argument to pass to the model.

        Argument
        --------
        output_type
            The logits processor provided.

        Returns
        -------
        Optional[LogitsProcessorList]
            The logits processor to pass to the model.

        """
        from transformers import LogitsProcessorList

        if output_type is not None:
            return LogitsProcessorList([output_type])
        return None

`format_input(model_input)`

Fomat the prompt arguments to pass to the model.

Argument

model_input The input passed by the user.

Returns:

Type	Description
`dict`	The formatted input.

Source code in outlines/models/transformers.py

@singledispatchmethod
def format_input(self, model_input):
    """Fomat the prompt arguments to pass to the model.

    Argument
    --------
    model_input
        The input passed by the user.

    Returns
    -------
    dict
        The formatted input.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available. Please "
        + "provide a list containing a text prompt and assets "
        + "(`Image`, `Audio` or `Video` instances) supported by your "
        + "model or a `Chat` instance."
    )

`format_output_type(output_type=None)`

Generate the logits processor argument to pass to the model.

Argument

output_type The logits processor provided.

Returns:

Type	Description
`Optional[LogitsProcessorList]`	The logits processor to pass to the model.

Source code in outlines/models/transformers.py

def format_output_type(
    self,
    output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional["LogitsProcessorList"]:
    """Generate the logits processor argument to pass to the model.

    Argument
    --------
    output_type
        The logits processor provided.

    Returns
    -------
    Optional[LogitsProcessorList]
        The logits processor to pass to the model.

    """
    from transformers import LogitsProcessorList

    if output_type is not None:
        return LogitsProcessorList([output_type])
    return None

`TransformersTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the Transformers model.

Source code in outlines/models/transformers.py

class TransformersTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Transformers` model."""

    def __init__(self, tokenizer: "PreTrainedTokenizer", has_chat_template: bool = False):
        self.tokenizer = tokenizer
        self.has_chat_template = has_chat_template

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Parameters
        ----------
        model_input
            The input passed by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available."
            "The only available types are `str` and `Chat`."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str:
        if self.has_chat_template:
            return self.format_chat_input(Chat([{"role": "user", "content": model_input}]))
        return model_input

    @format_input.register(Chat)
    def format_chat_input(self, model_input: Chat) -> str:
        return self.tokenizer.apply_chat_template(
            model_input.messages,
            tokenize=False,
            add_generation_prompt=True,
        )

    def format_output_type(
        self,
        output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional["LogitsProcessorList"]:
        """Generate the logits processor argument to pass to the model.

        Parameters
        ----------
        output_type
            The logits processor provided.

        Returns
        -------
        Optional[LogitsProcessorList]
            The logits processor to pass to the model.

        """
        from transformers import LogitsProcessorList

        if output_type is not None:
            return LogitsProcessorList([output_type])
        return None

`format_input(model_input)`

Generate the prompt argument to pass to the model.

Parameters:

Name	Type	Description	Default
`model_input`		The input passed by the user.	required

Returns:

Type	Description
`str`	The formatted input to be passed to the model.

Source code in outlines/models/transformers.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Parameters
    ----------
    model_input
        The input passed by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available."
        "The only available types are `str` and `Chat`."
    )

`format_output_type(output_type=None)`

Generate the logits processor argument to pass to the model.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor provided.	`None`

Returns:

Type	Description
`Optional[LogitsProcessorList]`	The logits processor to pass to the model.

Source code in outlines/models/transformers.py

def format_output_type(
    self,
    output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional["LogitsProcessorList"]:
    """Generate the logits processor argument to pass to the model.

    Parameters
    ----------
    output_type
        The logits processor provided.

    Returns
    -------
    Optional[LogitsProcessorList]
        The logits processor to pass to the model.

    """
    from transformers import LogitsProcessorList

    if output_type is not None:
        return LogitsProcessorList([output_type])
    return None

`from_transformers(model, tokenizer_or_processor, *, device_dtype=None)`

Create an Outlines Transformers or TransformersMultiModal model instance from a PreTrainedModel instance and a PreTrainedTokenizer or ProcessorMixin instance.

outlines supports PreTrainedModelForCausalLM, PreTrainedMambaForCausalLM, PreTrainedModelForSeq2Seq and any model that implements the transformers model API.

Parameters:

Name	Type	Description	Default
`model`	`PreTrainedModel`	A `transformers.PreTrainedModel` instance.	required
`tokenizer_or_processor`	`Union[PreTrainedTokenizer, ProcessorMixin]`	A `transformers.PreTrainedTokenizer` or `transformers.ProcessorMixin` instance.	required
`device_dtype`	`Optional[dtype]`	The dtype to use for the model. If not provided, the model will use the default dtype.	`None`

Returns:

Type	Description
`Union[Transformers, TransformersMultiModal]`	An Outlines `Transformers` or `TransformersMultiModal` model instance.

Source code in outlines/models/transformers.py

def from_transformers(
    model: "PreTrainedModel",
    tokenizer_or_processor: Union["PreTrainedTokenizer", "ProcessorMixin"],
    *,
    device_dtype: Optional["torch.dtype"] = None,
) -> Union[Transformers, TransformersMultiModal]:
    """Create an Outlines `Transformers` or `TransformersMultiModal` model
    instance from a `PreTrainedModel` instance and a `PreTrainedTokenizer` or
    `ProcessorMixin` instance.

    `outlines` supports `PreTrainedModelForCausalLM`,
    `PreTrainedMambaForCausalLM`, `PreTrainedModelForSeq2Seq` and any model
    that implements the `transformers` model API.

    Parameters
    ----------
    model
        A `transformers.PreTrainedModel` instance.
    tokenizer_or_processor
        A `transformers.PreTrainedTokenizer` or
        `transformers.ProcessorMixin` instance.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    Returns
    -------
    Union[Transformers, TransformersMultiModal]
        An Outlines `Transformers` or `TransformersMultiModal` model instance.

    """
    from transformers import (
        PreTrainedTokenizer, PreTrainedTokenizerFast, ProcessorMixin)

    if isinstance(
        tokenizer_or_processor, (PreTrainedTokenizer, PreTrainedTokenizerFast)
    ):
        tokenizer = tokenizer_or_processor
        return Transformers(model, tokenizer, device_dtype=device_dtype)
    elif isinstance(tokenizer_or_processor, ProcessorMixin):
        processor = tokenizer_or_processor
        return TransformersMultiModal(model, processor, device_dtype=device_dtype)
    else:
        raise ValueError(
            "We couldn't determine whether the model passed to `from_transformers`"
            + " is a text-2-text or a multi-modal model. Please provide a "
            + "a transformers tokenizer or processor."
        )

`get_llama_tokenizer_types()`

Get all the Llama tokenizer types/classes that need work-arounds.

When they can't be imported, a dummy class is created.

Source code in outlines/models/transformers.py

def get_llama_tokenizer_types():
    """Get all the Llama tokenizer types/classes that need work-arounds.

    When they can't be imported, a dummy class is created.

    """
    try:
        from transformers.models.llama import LlamaTokenizer
    except ImportError:  # pragma: no cover

        class LlamaTokenizer:  # type: ignore
            pass

    try:
        from transformers.models.llama import LlamaTokenizerFast
    except ImportError:  # pragma: no cover

        class LlamaTokenizerFast:  # type: ignore
            pass

    try:
        from transformers.models.code_llama import CodeLlamaTokenizer
    except ImportError:  # pragma: no cover

        class CodeLlamaTokenizer:  # type: ignore
            pass

    try:
        from transformers.models.code_llama import CodeLlamaTokenizerFast
    except ImportError:  # pragma: no cover

        class CodeLlamaTokenizerFast:  # type: ignore
            pass

    return (
        LlamaTokenizer,
        LlamaTokenizerFast,
        CodeLlamaTokenizer,
        CodeLlamaTokenizerFast,
    )

`utils`

`set_additional_properties_false_json_schema(schema)`

Set additionalProperties to False to all objects in the schema using jsonpath.

Parameters:

Name	Type	Description	Default
`schema`	`dict`	The JSON schema to modify	required

Returns:

Type	Description
`dict`	The modified schema with additionalProperties set to False

Source code in outlines/models/utils.py

def set_additional_properties_false_json_schema(schema: dict) -> dict:
    """Set additionalProperties to False to all objects in the schema using jsonpath.

    Parameters
    ----------
    schema
        The JSON schema to modify

    Returns
    -------
    dict
        The modified schema with additionalProperties set to False
    """
    # Get all nodes
    jsonpath_expr = jsonpath_ng.parse('$..*')
    matches = jsonpath_expr.find(schema)

    # Go over all nodes and set additionalProperties to False if it's an object
    for match in matches:
        if match.value == 'object':
            if 'additionalProperties' not in match.context.value:
                match.context.value['additionalProperties'] = False

    return schema

`vllm`

Integration with a vLLM server.

`AsyncVLLM`

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py

class AsyncVLLM(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "AsyncOpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("VLLM does not support batch inference.")

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.
        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                **client_args,
                stream=True,
            )
            async for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncOpenAI`	An `openai.AsyncOpenAI` client instance.	required

Source code in outlines/models/vllm.py

def __init__(
    self,
    client: "AsyncOpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/vllm.py

async def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.
    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )
        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`VLLM`

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py

class VLLM(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "OpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("VLLM does not support batch inference.")

    def generate_stream(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`OpenAI`	An `openai.OpenAI` client instance.	required

Source code in outlines/models/vllm.py

def __init__(
    self,
    client: "OpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/vllm.py

def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py

def generate_stream(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`VLLMTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the VLLM and AsyncVLLM models.

Source code in outlines/models/vllm.py

class VLLMTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `VLLM` and `AsyncVLLM` models."""

    def format_input(self, model_input: Union[Chat, str, list]) -> list:
        """Generate the value of the messages argument to pass to the client.

        We rely on the OpenAITypeAdapter to format the input as the vLLM server
        expects input in the same format as OpenAI.

        Parameters
        ----------
        model_input
            The input passed by the user.

        Returns
        -------
        list
            The formatted input to be passed to the model.

        """
        return OpenAITypeAdapter().format_input(model_input)

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the client.

        Parameters
        ----------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The structured output argument to pass to the model.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            return {"guided_grammar": term.definition}
        elif isinstance(term, JsonSchema):
            extra_body = {"guided_json": json.loads(term.schema)}
            if term.whitespace_pattern:
                extra_body["whitespace_pattern"] = term.whitespace_pattern
            return extra_body
        else:
            return {"guided_regex": to_regex(term)}

`format_input(model_input)`

Generate the value of the messages argument to pass to the client.

We rely on the OpenAITypeAdapter to format the input as the vLLM server expects input in the same format as OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The input passed by the user.	required

Returns:

Type	Description
`list`	The formatted input to be passed to the model.

Source code in outlines/models/vllm.py

def format_input(self, model_input: Union[Chat, str, list]) -> list:
    """Generate the value of the messages argument to pass to the client.

    We rely on the OpenAITypeAdapter to format the input as the vLLM server
    expects input in the same format as OpenAI.

    Parameters
    ----------
    model_input
        The input passed by the user.

    Returns
    -------
    list
        The formatted input to be passed to the model.

    """
    return OpenAITypeAdapter().format_input(model_input)

`format_output_type(output_type=None)`

Generate the structured output argument to pass to the client.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The structured output type provided.	`None`

Returns:

Type	Description
`dict`	The structured output argument to pass to the model.

Source code in outlines/models/vllm.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the client.

    Parameters
    ----------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The structured output argument to pass to the model.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        return {"guided_grammar": term.definition}
    elif isinstance(term, JsonSchema):
        extra_body = {"guided_json": json.loads(term.schema)}
        if term.whitespace_pattern:
            extra_body["whitespace_pattern"] = term.whitespace_pattern
        return extra_body
    else:
        return {"guided_regex": to_regex(term)}

`from_vllm(client, model_name=None)`

Create an Outlines VLLM or AsyncVLLM model instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI]`	An `openai.OpenAI` or `openai.AsyncOpenAI` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[VLLM, AsyncVLLM]`	An Outlines `VLLM` or `AsyncVLLM` model instance.

Source code in outlines/models/vllm.py

def from_vllm(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[VLLM, AsyncVLLM]:
    """Create an Outlines `VLLM` or `AsyncVLLM` model instance from an
    `openai.OpenAI` or `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[VLLM, AsyncVLLM]
        An Outlines `VLLM` or `AsyncVLLM` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return VLLM(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncVLLM(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

`vllm_offline`

Integration with the vllm library (offline mode).

Local runtime calls intentionally bypass outlines.exceptions.normalize_provider_errors().

`VLLMOffline`

Bases: Model

Thin wrapper around a vllm.LLM model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the vllm.LLM model.

Source code in outlines/models/vllm_offline.py

class VLLMOffline(Model):
    """Thin wrapper around a `vllm.LLM` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `vllm.LLM` model.

    """

    def __init__(self, model: "LLM"):
        """Create a VLLM model instance.

        Parameters
        ----------
        model
            A `vllm.LLM` model instance.

        """
        self.model = model
        self.tokenizer = self.model.get_tokenizer()
        self.type_adapter = VLLMOfflineTypeAdapter(has_chat_template=self._check_chat_template())

    def _build_generation_args(
        self,
        inference_kwargs: dict,
        output_type: Optional[Any] = None,
    ) -> "SamplingParams":
        """Create the `SamplingParams` object to pass to the `generate` method
        of the `vllm.LLM` model."""
        from vllm.sampling_params import StructuredOutputsParams, SamplingParams

        sampling_params = inference_kwargs.pop("sampling_params", None)

        if sampling_params is None:
            sampling_params = SamplingParams()

        output_type_args = self.type_adapter.format_output_type(output_type)
        if output_type_args:
            original_sampling_params_dict = {f: getattr(sampling_params, f) for f in sampling_params.__struct_fields__}
            sampling_params_dict = {**original_sampling_params_dict, "structured_outputs": StructuredOutputsParams(**output_type_args)}
            sampling_params = SamplingParams(**sampling_params_dict)

        return sampling_params

    def generate(
        self,
        model_input: Chat | str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str]]:
        """Generate text using vLLM offline.

        Parameters
        ----------
        prompt
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            in the `vllm.LLM` model.

        Returns
        -------
        Union[str, List[str]]
            The text generated by the model.

        """
        sampling_params = self._build_generation_args(
            inference_kwargs,
            output_type,
        )

        model_input = self.type_adapter.format_input(model_input)

        if isinstance(model_input, list):
            results = self.model.chat(
                messages=model_input,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        else:
            results = self.model.generate(
                prompts=model_input,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        results = [completion.text for completion in results[0].outputs]

        if len(results) == 1:
            return results[0]
        else:
            return results

    def generate_batch(
        self,
        model_input: List[Chat | str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[List[str], List[List[str]]]:
        """Generate a batch of completions using vLLM offline.

        Parameters
        ----------
        prompt
            The list of prompts based on which the model will generate a
            response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            in the `vllm.LLM` model.

        Returns
        -------
        Union[List[str], List[List[str]]]
            The text generated by the model.

        """
        sampling_params = self._build_generation_args(
            inference_kwargs,
            output_type,
        )

        model_inputs = [self.type_adapter.format_input(item) for item in model_input]

        if model_inputs and isinstance(model_inputs[0], list):
            results = self.model.chat(
                messages=model_inputs,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        else:
            results = self.model.generate(
                prompts=model_inputs,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        return [[sample.text for sample in batch.outputs] for batch in results]

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `vllm.LLM`.

        TODO: Implement the streaming functionality ourselves.

        """
        raise NotImplementedError(
            "Streaming is not available for the vLLM offline integration."
        )

    def _check_chat_template(self) -> bool:
        """Check if the tokenizer has a chat template."""
        # 1. Try HuggingFace-style chat template check (get_chat_template).
        # Only return early on True; on False or any exception fall through to
        # step 2 so that vLLM-style tokenizers are still handled correctly.
        if hasattr(self.tokenizer, "chat_template") or hasattr(self.tokenizer, "apply_chat_template"):
            try:
                from outlines.models.tokenizer import _check_hf_chat_template
                if _check_hf_chat_template(self.tokenizer):
                    return True
            except Exception:
                pass

        # 2. Try vLLM-style apply_chat_template (works for old and new vLLM).
        if hasattr(self.tokenizer, "apply_chat_template"):
            try:
                self.tokenizer.apply_chat_template([{"role": "user", "content": "test"}])
                return True
            except Exception:
                pass

        # 3. Default: no chat template
        return False

`init(model)`

Create a VLLM model instance.

Parameters:

Name	Type	Description	Default
`model`	`LLM`	A `vllm.LLM` model instance.	required

Source code in outlines/models/vllm_offline.py

def __init__(self, model: "LLM"):
    """Create a VLLM model instance.

    Parameters
    ----------
    model
        A `vllm.LLM` model instance.

    """
    self.model = model
    self.tokenizer = self.model.get_tokenizer()
    self.type_adapter = VLLMOfflineTypeAdapter(has_chat_template=self._check_chat_template())

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using vLLM offline.

Parameters:

Name	Type	Description	Default
`prompt`		The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method in the `vllm.LLM` model.	`{}`

Returns:

Type	Description
`Union[str, List[str]]`	The text generated by the model.

Source code in outlines/models/vllm_offline.py

def generate(
    self,
    model_input: Chat | str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str]]:
    """Generate text using vLLM offline.

    Parameters
    ----------
    prompt
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        in the `vllm.LLM` model.

    Returns
    -------
    Union[str, List[str]]
        The text generated by the model.

    """
    sampling_params = self._build_generation_args(
        inference_kwargs,
        output_type,
    )

    model_input = self.type_adapter.format_input(model_input)

    if isinstance(model_input, list):
        results = self.model.chat(
            messages=model_input,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    else:
        results = self.model.generate(
            prompts=model_input,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    results = [completion.text for completion in results[0].outputs]

    if len(results) == 1:
        return results[0]
    else:
        return results

`generate_batch(model_input, output_type=None, **inference_kwargs)`

Generate a batch of completions using vLLM offline.

Parameters:

Name	Type	Description	Default
`prompt`		The list of prompts based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method in the `vllm.LLM` model.	`{}`

Returns:

Type	Description
`Union[List[str], List[List[str]]]`	The text generated by the model.

Source code in outlines/models/vllm_offline.py

def generate_batch(
    self,
    model_input: List[Chat | str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[List[str], List[List[str]]]:
    """Generate a batch of completions using vLLM offline.

    Parameters
    ----------
    prompt
        The list of prompts based on which the model will generate a
        response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        in the `vllm.LLM` model.

    Returns
    -------
    Union[List[str], List[List[str]]]
        The text generated by the model.

    """
    sampling_params = self._build_generation_args(
        inference_kwargs,
        output_type,
    )

    model_inputs = [self.type_adapter.format_input(item) for item in model_input]

    if model_inputs and isinstance(model_inputs[0], list):
        results = self.model.chat(
            messages=model_inputs,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    else:
        results = self.model.generate(
            prompts=model_inputs,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    return [[sample.text for sample in batch.outputs] for batch in results]

`generate_stream(model_input, output_type, **inference_kwargs)`

Not available for vllm.LLM.

TODO: Implement the streaming functionality ourselves.

Source code in outlines/models/vllm_offline.py

def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `vllm.LLM`.

    TODO: Implement the streaming functionality ourselves.

    """
    raise NotImplementedError(
        "Streaming is not available for the vLLM offline integration."
    )

`VLLMOfflineTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the VLLMOffline model.

Source code in outlines/models/vllm_offline.py

class VLLMOfflineTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `VLLMOffline` model."""

    def __init__(self, has_chat_template: bool = False):
        self.has_chat_template = has_chat_template

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Argument
        --------
        model_input
            The input passed by the user.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "VLLM offline. The only available types are `str` and "
            "`Chat` (containing a prompt and images)."
        )

    @format_input.register(str)
    def format_input_str(self, model_input: str) -> str | list:
        """Format a `str` input.

        """
        if self.has_chat_template:
            return self.format_input_chat(Chat([{"role": "user", "content": model_input}]))
        return model_input

    @format_input.register(Chat)
    def format_input_chat(self, model_input: Chat) -> list:
        """Format a `Chat` input.

        """
        for message in model_input.messages:
            content = message["content"]
            if isinstance(content, list):
                raise ValueError(
                    "Assets are not supported for vLLM offline."
                    "Please only use text content in the `Chat` input."
                )
        return OpenAITypeAdapter().format_input(model_input)

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the model.

        For vLLM, the structured output definition is set in the
        `GuidedDecodingParams` constructor that is provided as a value to the
        `guided_decoding` parameter of the `SamplingParams` constructor, itself
        provided as a value to the `sampling_params` parameter of the `generate`
        method.

        Parameters
        ----------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The arguments to provide to the `GuidedDecodingParams` constructor.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            return {"grammar": term.definition}
        elif isinstance(term, JsonSchema):
            guided_decoding_params = {"json": json.loads(term.schema)}
            if term.whitespace_pattern:
                guided_decoding_params["whitespace_pattern"] = term.whitespace_pattern
            return guided_decoding_params
        else:
            return {"regex": to_regex(term)}

`format_input(model_input)`

Generate the prompt argument to pass to the model.

Argument

model_input The input passed by the user.

Source code in outlines/models/vllm_offline.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Argument
    --------
    model_input
        The input passed by the user.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "VLLM offline. The only available types are `str` and "
        "`Chat` (containing a prompt and images)."
    )

`format_input_chat(model_input)`

Format a Chat input.

Source code in outlines/models/vllm_offline.py

@format_input.register(Chat)
def format_input_chat(self, model_input: Chat) -> list:
    """Format a `Chat` input.

    """
    for message in model_input.messages:
        content = message["content"]
        if isinstance(content, list):
            raise ValueError(
                "Assets are not supported for vLLM offline."
                "Please only use text content in the `Chat` input."
            )
    return OpenAITypeAdapter().format_input(model_input)

`format_input_str(model_input)`

Format a str input.

Source code in outlines/models/vllm_offline.py

@format_input.register(str)
def format_input_str(self, model_input: str) -> str | list:
    """Format a `str` input.

    """
    if self.has_chat_template:
        return self.format_input_chat(Chat([{"role": "user", "content": model_input}]))
    return model_input

`format_output_type(output_type=None)`

Generate the structured output argument to pass to the model.

For vLLM, the structured output definition is set in the GuidedDecodingParams constructor that is provided as a value to the guided_decoding parameter of the SamplingParams constructor, itself provided as a value to the sampling_params parameter of the generate method.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The structured output type provided.	`None`

Returns:

Type	Description
`dict`	The arguments to provide to the `GuidedDecodingParams` constructor.

Source code in outlines/models/vllm_offline.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the model.

    For vLLM, the structured output definition is set in the
    `GuidedDecodingParams` constructor that is provided as a value to the
    `guided_decoding` parameter of the `SamplingParams` constructor, itself
    provided as a value to the `sampling_params` parameter of the `generate`
    method.

    Parameters
    ----------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The arguments to provide to the `GuidedDecodingParams` constructor.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        return {"grammar": term.definition}
    elif isinstance(term, JsonSchema):
        guided_decoding_params = {"json": json.loads(term.schema)}
        if term.whitespace_pattern:
            guided_decoding_params["whitespace_pattern"] = term.whitespace_pattern
        return guided_decoding_params
    else:
        return {"regex": to_regex(term)}

`from_vllm_offline(model)`

Create an Outlines VLLMOffline model instance from a vllm.LLM instance.

Parameters:

Name	Type	Description	Default
`model`	`LLM`	A `vllm.LLM` instance.	required

Returns:

Type	Description
`VLLMOffline`	An Outlines `VLLMOffline` model instance.

Source code in outlines/models/vllm_offline.py

def from_vllm_offline(model: "LLM") -> VLLMOffline:
    """Create an Outlines `VLLMOffline` model instance from a `vllm.LLM`
    instance.

    Parameters
    ----------
    model
        A `vllm.LLM` instance.

    Returns
    -------
    VLLMOffline
        An Outlines `VLLMOffline` model instance.

    """
    return VLLMOffline(model)

models

Anthropic

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs)

generate_stream(model_input, output_type=None, **inference_kwargs)

AsyncDottxt

__init__(client, model=None)

generate(model_input, output_type=None, **inference_kwargs) async

AsyncLMStudio

__init__(client, model_name=None)

close() async

generate(model_input, output_type=None, **kwargs) async

generate_stream(model_input, output_type=None, **kwargs) async

AsyncMistral

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs) async

generate_stream(model_input, output_type=None, **inference_kwargs) async

AsyncOllama

__init__(client, model_name=None)

generate(model_input, output_type=None, **kwargs) async

generate_stream(model_input, output_type=None, **kwargs) async

AsyncOpenAI

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs) async

generate_stream(model_input, output_type=None, **inference_kwargs) async

AsyncSGLang

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs) async

generate_stream(model_input, output_type=None, **inference_kwargs) async

AsyncTGI

__init__(client)

generate(model_input, output_type=None, **inference_kwargs) async

generate_stream(model_input, output_type=None, **inference_kwargs) async

AsyncVLLM

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs) async

generate_stream(model_input, output_type=None, **inference_kwargs) async

Dottxt

__init__(client, model=None)

generate(model_input, output_type=None, **inference_kwargs)

Gemini

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs)

generate_stream(model_input, output_type=None, **inference_kwargs)

LMStudio

__init__(client, model_name=None)

generate(model_input, output_type=None, **kwargs)

generate_stream(model_input, output_type=None, **kwargs)

LlamaCpp

__init__(model, chat_mode=True)

generate(model_input, output_type=None, **inference_kwargs)

generate_stream(model_input, output_type=None, **inference_kwargs)

MLXLM

__init__(model, tokenizer)

generate(model_input, output_type=None, **kwargs)

generate_batch(model_input, output_type=None, **kwargs)

generate_stream(model_input, output_type=None, **kwargs)

Mistral

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs)

generate_stream(model_input, output_type=None, **inference_kwargs)

Model

__call__(model_input, output_type=None, backend=None, **inference_kwargs)

batch(model_input, output_type=None, backend=None, **inference_kwargs)

generate(model_input, output_type=None, **inference_kwargs) abstractmethod

generate_batch(model_input, output_type=None, **inference_kwargs) abstractmethod

generate_stream(model_input, output_type=None, **inference_kwargs) abstractmethod

stream(model_input, output_type=None, backend=None, **inference_kwargs)

ModelTypeAdapter

format_input(model_input) abstractmethod

format_output_type(output_type=None) abstractmethod

Ollama

__init__(client, model_name=None)

generate(model_input, output_type=None, **kwargs)

generate_stream(model_input, output_type=None, **kwargs)

OpenAI

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs)

generate_stream(model_input, output_type=None, **inference_kwargs)

SGLang

`Anthropic`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)`

`generate_stream(model_input, output_type=None, **inference_kwargs)`

`AsyncDottxt`

`init(client, model=None)`

`generate(model_input, output_type=None, **inference_kwargs)` `async`

`AsyncLMStudio`

`init(client, model_name=None)`

`close()` `async`

`generate(model_input, output_type=None, **kwargs)` `async`

`generate_stream(model_input, output_type=None, **kwargs)` `async`

`AsyncMistral`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)` `async`

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

`AsyncOllama`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **kwargs)` `async`

`generate_stream(model_input, output_type=None, **kwargs)` `async`

`AsyncOpenAI`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)` `async`

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

`AsyncSGLang`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)` `async`

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

`AsyncTGI`

`init(client)`

`generate(model_input, output_type=None, **inference_kwargs)` `async`

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

`AsyncVLLM`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)` `async`

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

`Dottxt`

`init(client, model=None)`

`generate(model_input, output_type=None, **inference_kwargs)`

`Gemini`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)`

`generate_stream(model_input, output_type=None, **inference_kwargs)`

`LMStudio`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **kwargs)`

`generate_stream(model_input, output_type=None, **kwargs)`

`LlamaCpp`

`init(model, chat_mode=True)`

`generate(model_input, output_type=None, **inference_kwargs)`

`generate_stream(model_input, output_type=None, **inference_kwargs)`

`MLXLM`

`init(model, tokenizer)`

`generate(model_input, output_type=None, **kwargs)`

`generate_batch(model_input, output_type=None, **kwargs)`

`generate_stream(model_input, output_type=None, **kwargs)`

`Mistral`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)`

`generate_stream(model_input, output_type=None, **inference_kwargs)`

`Model`

`call(model_input, output_type=None, backend=None, **inference_kwargs)`

`batch(model_input, output_type=None, backend=None, **inference_kwargs)`

`generate(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

`generate_batch(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

`generate_stream(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

`stream(model_input, output_type=None, backend=None, **inference_kwargs)`

`ModelTypeAdapter`

`format_input(model_input)` `abstractmethod`

`format_output_type(output_type=None)` `abstractmethod`

`Ollama`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **kwargs)`

`generate_stream(model_input, output_type=None, **kwargs)`

`OpenAI`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)`

`generate_stream(model_input, output_type=None, **inference_kwargs)`

`SGLang`

`init(client, model_name=None)`