outlines

Outlines is a Generative Model Programming Framework.

`Anthropic`

Bases: Model

Thin wrapper around the anthropic.Anthropic client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the anthropic.Anthropic client.

Source code in outlines/models/anthropic.py

class Anthropic(Model):
    """Thin wrapper around the `anthropic.Anthropic` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `anthropic.Anthropic` client.

    """
    def __init__(
        self, client: "AnthropicClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client
            An `anthropic.Anthropic` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = AnthropicTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            completion = self.client.messages.create(
                **messages,
                **inference_kwargs,
            )
        return completion.content[0].text

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Anthropic does not support batch generation."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.messages.create(
                **messages,
                stream=True,
                **inference_kwargs,
            )
            for chunk in stream:
                if (
                    chunk.type == "content_block_delta"
                    and chunk.delta.type == "text_delta"
                ):
                    yield chunk.delta.text

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Anthropic`	An `anthropic.Anthropic` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/anthropic.py

def __init__(
    self, client: "AnthropicClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client
        An `anthropic.Anthropic` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = AnthropicTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using Anthropic.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	As structured generation is not supported by Anthropic, the value of this argument must be `None`. Otherwise, an error will be raised at runtime.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The response generated by the model.

Source code in outlines/models/anthropic.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        completion = self.client.messages.create(
            **messages,
            **inference_kwargs,
        )
    return completion.content[0].text

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using Anthropic.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	As structured generation is not supported by Anthropic, the value of this argument must be `None`. Otherwise, an error will be raised at runtime.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/anthropic.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.messages.create(
            **messages,
            stream=True,
            **inference_kwargs,
        )
        for chunk in stream:
            if (
                chunk.type == "content_block_delta"
                and chunk.delta.type == "text_delta"
            ):
                yield chunk.delta.text

`AsyncDottxt`

Bases: AsyncModel

Async thin wrapper around the dottxt.client.AsyncDotTxt client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the dottxt.client.AsyncDotTxt client.

Source code in outlines/models/dottxt.py

class AsyncDottxt(AsyncModel):
    """Async thin wrapper around the `dottxt.client.AsyncDotTxt` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `dottxt.client.AsyncDotTxt`
    client.

    """

    def __init__(
        self,
        client: "AsyncDottxtClient",
        model: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            A `dottxt.AsyncDotTxt` client.
        model
            The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

        """
        self.client = client
        self.model = model
        self.type_adapter = DottxtTypeAdapter()

    async def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Dottxt asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model as a JSON string.

        """
        prompt = self.type_adapter.format_input(model_input)
        json_schema = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model is not None:
            inference_kwargs["model"] = self.model

        if "model" not in inference_kwargs:
            raise ValueError(
                "A model identifier is required. Pass it to `from_dottxt_async()` "
                "or as a `model=` keyword argument at generation time."
            )

        with normalize_provider_errors(PROVIDER):
            result = await self.client.generate(
                input=prompt,
                response_format=json_schema,
                **inference_kwargs,
            )

        return json.dumps(result)

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support batch generation."
        )

    async def generate_stream(  # type: ignore[override]
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support streaming. Call the model/generator for "
            + "regular generation instead."
        )
        yield  # makes this an async generator so `async for` can consume it

`init(client, model=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncDotTxt`	A `dottxt.AsyncDotTxt` client.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Source code in outlines/models/dottxt.py

def __init__(
    self,
    client: "AsyncDottxtClient",
    model: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        A `dottxt.AsyncDotTxt` client.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    """
    self.client = client
    self.model = model
    self.type_adapter = DottxtTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using Dottxt asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model as a JSON string.

Source code in outlines/models/dottxt.py

async def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Dottxt asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model as a JSON string.

    """
    prompt = self.type_adapter.format_input(model_input)
    json_schema = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model is not None:
        inference_kwargs["model"] = self.model

    if "model" not in inference_kwargs:
        raise ValueError(
            "A model identifier is required. Pass it to `from_dottxt_async()` "
            "or as a `model=` keyword argument at generation time."
        )

    with normalize_provider_errors(PROVIDER):
        result = await self.client.generate(
            input=prompt,
            response_format=json_schema,
            **inference_kwargs,
        )

    return json.dumps(result)

`AsyncLMStudio`

Bases: AsyncModel

Thin wrapper around a lmstudio.AsyncClient client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the LMStudio async client.

Source code in outlines/models/lmstudio.py

class AsyncLMStudio(AsyncModel):
    """Thin wrapper around a `lmstudio.AsyncClient` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the LMStudio async client.

    """

    def __init__(
        self, client: "AsyncClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client
            A LMStudio AsyncClient instance.
        model_name
            The name of the model to use. If not provided, uses the default
            loaded model in LMStudio.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = LMStudioTypeAdapter()
        self._context_entered = False

    async def close(self) -> None:
        """Close the async client and release resources."""
        if self._context_entered:
            await self.client.__aexit__(None, None, None)
            self._context_entered = False

    async def generate(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using LMStudio asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        str
            The text generated by the model.

        """
        if not self._context_entered:
            await self.client.__aenter__()
            self._context_entered = True

        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        result = await model.respond(formatted_input, **kwargs)
        return result.content

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `lmstudio` library does not support batch inference."
        )

    async def generate_stream(  # type: ignore
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using LMStudio asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        if not self._context_entered:
            await self.client.__aenter__()
            self._context_entered = True

        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        stream = await model.respond_stream(formatted_input, **kwargs)
        async for fragment in stream:
            yield fragment.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncClient`	A LMStudio AsyncClient instance.	required
`model_name`	`Optional[str]`	The name of the model to use. If not provided, uses the default loaded model in LMStudio.	`None`

Source code in outlines/models/lmstudio.py

def __init__(
    self, client: "AsyncClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client
        A LMStudio AsyncClient instance.
    model_name
        The name of the model to use. If not provided, uses the default
        loaded model in LMStudio.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = LMStudioTypeAdapter()
    self._context_entered = False

`close()` `async`

Close the async client and release resources.

Source code in outlines/models/lmstudio.py

async def close(self) -> None:
    """Close the async client and release resources."""
    if self._context_entered:
        await self.client.__aexit__(None, None, None)
        self._context_entered = False

`generate(model_input, output_type=None, **kwargs)` `async`

Generate text using LMStudio asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/lmstudio.py

async def generate(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using LMStudio asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    str
        The text generated by the model.

    """
    if not self._context_entered:
        await self.client.__aenter__()
        self._context_entered = True

    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    result = await model.respond(formatted_input, **kwargs)
    return result.content

`generate_stream(model_input, output_type=None, **kwargs)` `async`

Stream text using LMStudio asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/lmstudio.py

async def generate_stream(  # type: ignore
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using LMStudio asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    if not self._context_entered:
        await self.client.__aenter__()
        self._context_entered = True

    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    stream = await model.respond_stream(formatted_input, **kwargs)
    async for fragment in stream:
        yield fragment.content

`AsyncMistral`

Bases: AsyncModel

Async thin wrapper around the mistralai.Mistral client.

Converts input and output types to arguments for the mistralai.Mistral client's async methods (chat.complete_async or chat.stream_async).

Source code in outlines/models/mistral.py

class AsyncMistral(AsyncModel):
    """Async thin wrapper around the `mistralai.Mistral` client.

    Converts input and output types to arguments for the `mistralai.Mistral`
    client's async methods (`chat.complete_async` or `chat.stream_async`).

    """

    def __init__(
        self, client: "MistralClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client : MistralClient
            A mistralai.Mistral client instance.
        model_name : Optional[str]
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = MistralTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate a response from the model asynchronously.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs : Any
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The response generated by the model as text.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = await self.client.chat.complete_async(
                messages=messages,
                response_format=response_format,
                stream=False,
                **inference_kwargs,
            )

        outputs = [choice.message for choice in result.choices]

        if len(outputs) == 1:
            return outputs[0].content
        else:
            return [m.content for m in outputs]

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The mistralai library does not support batch inference."
        )

    async def generate_stream(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        """Generate text from the model as an async stream of chunks.

        Parameters
        ----------
        model_input
            str, list, or chat input to generate from.
        output_type
            Optional type for structured output.
        **inference_kwargs
            Extra kwargs like "model" name.

        Yields
        ------
        str
            Chunks of text as they are streamed.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.stream_async(
                messages=messages,
                response_format=response_format,
                **inference_kwargs
            )
            async for chunk in response:
                if (
                    hasattr(chunk, "data")
                    and chunk.data.choices
                    and len(chunk.data.choices) > 0
                    and hasattr(chunk.data.choices[0], "delta")
                    and chunk.data.choices[0].delta.content is not None
                ):
                    yield chunk.data.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/mistral.py

def __init__(
    self, client: "MistralClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = MistralTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate a response from the model asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The response generated by the model as text.

Source code in outlines/models/mistral.py

async def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate a response from the model asynchronously.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs : Any
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The response generated by the model as text.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = await self.client.chat.complete_async(
            messages=messages,
            response_format=response_format,
            stream=False,
            **inference_kwargs,
        )

    outputs = [choice.message for choice in result.choices]

    if len(outputs) == 1:
        return outputs[0].content
    else:
        return [m.content for m in outputs]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Generate text from the model as an async stream of chunks.

Parameters:

Name	Description	Default
`model_input`	str, list, or chat input to generate from.	required
`output_type`	Optional type for structured output.	`None`
`**inference_kwargs`	Extra kwargs like "model" name.	`{}`

Yields:

Type	Description
`str`	Chunks of text as they are streamed.

Source code in outlines/models/mistral.py

async def generate_stream(
    self,
    model_input,
    output_type=None,
    **inference_kwargs,
):
    """Generate text from the model as an async stream of chunks.

    Parameters
    ----------
    model_input
        str, list, or chat input to generate from.
    output_type
        Optional type for structured output.
    **inference_kwargs
        Extra kwargs like "model" name.

    Yields
    ------
    str
        Chunks of text as they are streamed.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.stream_async(
            messages=messages,
            response_format=response_format,
            **inference_kwargs
        )
        async for chunk in response:
            if (
                hasattr(chunk, "data")
                and chunk.data.choices
                and len(chunk.data.choices) > 0
                and hasattr(chunk.data.choices[0], "delta")
                and chunk.data.choices[0].delta.content is not None
            ):
                yield chunk.data.choices[0].delta.content

`AsyncOllama`

Bases: AsyncModel

Thin wrapper around the ollama.AsyncClient client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the ollama.AsyncClient client.

Source code in outlines/models/ollama.py

class AsyncOllama(AsyncModel):
    """Thin wrapper around the `ollama.AsyncClient` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `ollama.AsyncClient` client.

    """

    def __init__(
        self,client: "AsyncClient", model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `ollama.Client` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OllamaTypeAdapter()

    async def generate(self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                **kwargs,
            )
        return response.message.content

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `ollama` library does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                stream=True,
                **kwargs,
            )
            async for chunk in stream:
                yield chunk.message.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncClient`	The `ollama.Client` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/ollama.py

def __init__(
    self,client: "AsyncClient", model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `ollama.Client` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OllamaTypeAdapter()

`generate(model_input, output_type=None, **kwargs)` `async`

Generate text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/ollama.py

async def generate(self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )
    return response.message.content

`generate_stream(model_input, output_type=None, **kwargs)` `async`

Stream text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/ollama.py

async def generate_stream( # type: ignore
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            stream=True,
            **kwargs,
        )
        async for chunk in stream:
            yield chunk.message.content

`AsyncOpenAI`

Bases: AsyncModel

Thin wrapper around the openai.AsyncOpenAI client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.AsyncOpenAI client.

Source code in outlines/models/openai.py

class AsyncOpenAI(AsyncModel):
    """Thin wrapper around the `openai.AsyncOpenAI` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.AsyncOpenAI` client.

    """

    def __init__(
        self,
        client: Union["AsyncOpenAIClient", "AsyncAzureOpenAIClient"],
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OpenAITypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = await self.client.chat.completions.create(
                messages=messages,
                **response_format,
                **inference_kwargs,
            )

        messages = [choice.message for choice in result.choices]
        for message in messages:
            if message.refusal is not None:
                raise GenerationError(
                    f"OpenAI refused to answer the request: {message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `openai` library does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs,
    ) -> AsyncIterator[str]:
        """Stream text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                stream=True,
                messages=messages,
                **response_format,
                **inference_kwargs
            )
            async for chunk in stream:
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Union[AsyncOpenAI, AsyncAzureOpenAI]`	The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/openai.py

def __init__(
    self,
    client: Union["AsyncOpenAIClient", "AsyncAzureOpenAIClient"],
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OpenAITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/openai.py

async def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = await self.client.chat.completions.create(
            messages=messages,
            **response_format,
            **inference_kwargs,
        )

    messages = [choice.message for choice in result.choices]
    for message in messages:
        if message.refusal is not None:
            raise GenerationError(
                f"OpenAI refused to answer the request: {message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/openai.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs,
) -> AsyncIterator[str]:
    """Stream text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            stream=True,
            messages=messages,
            **response_format,
            **inference_kwargs
        )
        async for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`AsyncSGLang`

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py

class AsyncSGLang(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.
        model_name
            The name of the model to use.

        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using `sglang`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The SGLang server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "SGLang does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Return a text generator.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                **client_args,
                stream=True,
            )
            async for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`		An `openai.AsyncOpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Parameters:

Name	Type	Description	Default
`client`		An `openai.AsyncOpenAI` client instance.	required

Source code in outlines/models/sglang.py

def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.
    model_name
        The name of the model to use.

    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using sglang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/sglang.py

async def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using `sglang`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The SGLang server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Return a text generator.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Return a text generator.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )
        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`AsyncTGI`

Bases: AsyncModel

Thin async wrapper around a huggingface_hub.AsyncInferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.AsyncInferenceClient client.

Source code in outlines/models/tgi.py

class AsyncTGI(AsyncModel):
    """Thin async wrapper around a `huggingface_hub.AsyncInferenceClient`
    client used to communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.AsyncInferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `AsyncInferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    async def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.text_generation(**client_args)

        return response

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("TGI does not support batch inference.")

    async def generate_stream( # type: ignore
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.text_generation(
                **client_args, stream=True
            )
            async for chunk in stream:  # pragma: no cover
                yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

`init(client)`

Parameters:

Name	Type	Description	Default
`client`		A huggingface `AsyncInferenceClient` client instance.	required

Source code in outlines/models/tgi.py

def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `AsyncInferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/tgi.py

async def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.text_generation(**client_args)

    return response

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py

async def generate_stream( # type: ignore
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.text_generation(
            **client_args, stream=True
        )
        async for chunk in stream:  # pragma: no cover
            yield chunk

`AsyncVLLM`

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py

class AsyncVLLM(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "AsyncOpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("VLLM does not support batch inference.")

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.
        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                **client_args,
                stream=True,
            )
            async for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncOpenAI`	An `openai.AsyncOpenAI` client instance.	required

Source code in outlines/models/vllm.py

def __init__(
    self,
    client: "AsyncOpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/vllm.py

async def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.
    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )
        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`Dottxt`

Bases: Model

Thin wrapper around the dottxt.client.DotTxt client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the dottxt.client.DotTxt client.

Source code in outlines/models/dottxt.py

class Dottxt(Model):
    """Thin wrapper around the `dottxt.client.DotTxt` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `dottxt.client.DotTxt` client.

    """

    def __init__(
        self,
        client: "DottxtClient",
        model: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            A `dottxt.DotTxt` client.
        model
            The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

        """
        self.client = client
        self.model = model
        self.type_adapter = DottxtTypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Dottxt.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model as a JSON string.

        """
        prompt = self.type_adapter.format_input(model_input)
        json_schema = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model is not None:
            inference_kwargs["model"] = self.model

        if "model" not in inference_kwargs:
            raise ValueError(
                "A model identifier is required. Pass it to `from_dottxt()` "
                "or as a `model=` keyword argument at generation time."
            )

        with normalize_provider_errors(PROVIDER):
            result = self.client.generate(
                input=prompt,
                response_format=json_schema,
                **inference_kwargs,
            )

        return json.dumps(result)


    def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support batch generation."
        )

    def generate_stream(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support streaming. Call the model/generator for "
            + "regular generation instead."
        )

`init(client, model=None)`

Parameters:

Name	Type	Description	Default
`client`	`DotTxt`	A `dottxt.DotTxt` client.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Source code in outlines/models/dottxt.py

def __init__(
    self,
    client: "DottxtClient",
    model: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        A `dottxt.DotTxt` client.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    """
    self.client = client
    self.model = model
    self.type_adapter = DottxtTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using Dottxt.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model as a JSON string.

Source code in outlines/models/dottxt.py

def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Dottxt.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model as a JSON string.

    """
    prompt = self.type_adapter.format_input(model_input)
    json_schema = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model is not None:
        inference_kwargs["model"] = self.model

    if "model" not in inference_kwargs:
        raise ValueError(
            "A model identifier is required. Pass it to `from_dottxt()` "
            "or as a `model=` keyword argument at generation time."
        )

    with normalize_provider_errors(PROVIDER):
        result = self.client.generate(
            input=prompt,
            response_format=json_schema,
            **inference_kwargs,
        )

    return json.dumps(result)

`Gemini`

Bases: Model

Thin wrapper around the google.genai.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the google.genai.Client client.

Source code in outlines/models/gemini.py

class Gemini(Model):
    """Thin wrapper around the `google.genai.Client` client.

    This wrapper is used to convert the input and output types specified by
    the users at a higher level to arguments to the `google.genai.Client`
    client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            A `google.genai.Client` instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = GeminiTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> str:
        """Generate a response from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        with normalize_provider_errors(PROVIDER):
            completion = self.client.models.generate_content(
                **contents,
                model=inference_kwargs.pop("model", self.model_name),
                config={**generation_config, **inference_kwargs}
            )

        return completion.text

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Gemini does not support batch generation."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        with normalize_provider_errors(PROVIDER):
            stream = self.client.models.generate_content_stream(
                **contents,
                model=inference_kwargs.pop("model", self.model_name),
                config={**generation_config, **inference_kwargs},
            )
            for chunk in stream:
                if hasattr(chunk, "text") and chunk.text:
                    yield chunk.text

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	A `google.genai.Client` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/gemini.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = GeminiTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate a response from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The response generated by the model.

Source code in outlines/models/gemini.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> str:
    """Generate a response from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    with normalize_provider_errors(PROVIDER):
        completion = self.client.models.generate_content(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs}
        )

    return completion.text

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Generate a stream of responses from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/gemini.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    with normalize_provider_errors(PROVIDER):
        stream = self.client.models.generate_content_stream(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs},
        )
        for chunk in stream:
            if hasattr(chunk, "text") and chunk.text:
                yield chunk.text

`LMStudio`

Bases: Model

Thin wrapper around a lmstudio.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the LMStudio client.

Source code in outlines/models/lmstudio.py

class LMStudio(Model):
    """Thin wrapper around a `lmstudio.Client` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the LMStudio client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            A LMStudio Client instance obtained via `lmstudio.Client()` or
            `lmstudio.get_default_client()`.
        model_name
            The name of the model to use. If not provided, uses the default
            loaded model in LMStudio.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = LMStudioTypeAdapter()

    def generate(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using LMStudio.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        result = model.respond(formatted_input, **kwargs)
        return result.content

    def generate_batch(
        self,
        model_input,
        output_type=None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `lmstudio` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using LMStudio.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        stream = model.respond_stream(formatted_input, **kwargs)
        for fragment in stream:
            yield fragment.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	A LMStudio Client instance obtained via `lmstudio.Client()` or `lmstudio.get_default_client()`.	required
`model_name`	`Optional[str]`	The name of the model to use. If not provided, uses the default loaded model in LMStudio.	`None`

Source code in outlines/models/lmstudio.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        A LMStudio Client instance obtained via `lmstudio.Client()` or
        `lmstudio.get_default_client()`.
    model_name
        The name of the model to use. If not provided, uses the default
        loaded model in LMStudio.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = LMStudioTypeAdapter()

`generate(model_input, output_type=None, **kwargs)`

Generate text using LMStudio.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/lmstudio.py

def generate(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using LMStudio.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    result = model.respond(formatted_input, **kwargs)
    return result.content

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using LMStudio.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/lmstudio.py

def generate_stream(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> Iterator[str]:
    """Stream text using LMStudio.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    stream = model.respond_stream(formatted_input, **kwargs)
    for fragment in stream:
        yield fragment.content

`LlamaCpp`

Bases: Model

Thin wrapper around the llama_cpp.Llama model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the llama_cpp.Llama model.

Source code in outlines/models/llamacpp.py

class LlamaCpp(Model):
    """Thin wrapper around the `llama_cpp.Llama` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `llama_cpp.Llama` model.
    """

    tensor_library_name = "numpy"

    def __init__(self, model: "Llama", chat_mode: bool = True):
        """
        Parameters
        ----------
        model
            A `llama_cpp.Llama` model instance.
        chat_mode
            Whether to enable chat mode. If `False`, the model will regard
            all `str` inputs as plain text prompts. If `True`, the model will
            regard all `str` inputs as user messages in a chat conversation.

        """
        self.model = model
        self.tokenizer = LlamaCppTokenizer(self.model)

        # Note: llama-cpp-python provides a default chat-template fallback even when
        # the user hasn't explicitly configured one:
        # https://github.com/abetlen/llama-cpp-python/blob/c37132b/llama_cpp/llama.py#L540-L545
        # We keep the default as True because the upstream library generally favors chat-style usage.
        self.type_adapter = LlamaCppTypeAdapter(has_chat_template=chat_mode)

    def generate(
        self,
        model_input: Union[Chat, str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        prompt = self.type_adapter.format_input(model_input)

        if isinstance(prompt, str):
            completion = self.model(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                **inference_kwargs,
            )
            result = completion["choices"][0]["text"]
        elif isinstance(prompt, list):
            completion = self.model.create_chat_completion(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                **inference_kwargs,
            )
            result = completion["choices"][0]["message"]["content"]
        else:  # Never reached  # pragma: no cover
            raise ValueError("Unexpected prompt type.")

        self.model.reset()

        return result

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("LlamaCpp does not support batch generation.")

    def generate_stream(
        self,
        model_input: Union[Chat, str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        prompt = self.type_adapter.format_input(model_input)

        if isinstance(prompt, str):
            generator = self.model(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                stream=True,
                **inference_kwargs,
            )
            for chunk in generator:
                yield chunk["choices"][0]["text"]

        elif isinstance(prompt, list):
            generator = self.model.create_chat_completion(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                stream=True,
                **inference_kwargs,
            )
            for chunk in generator:
                yield chunk["choices"][0]["delta"].get("content", "")
        else:  # Never reached  # pragma: no cover
            raise ValueError("Unexpected prompt type.")

`init(model, chat_mode=True)`

Parameters:

Name	Type	Description	Default
`model`	`Llama`	A `llama_cpp.Llama` model instance.	required
`chat_mode`	`bool`	Whether to enable chat mode. If `False`, the model will regard all `str` inputs as plain text prompts. If `True`, the model will regard all `str` inputs as user messages in a chat conversation.	`True`

Source code in outlines/models/llamacpp.py

def __init__(self, model: "Llama", chat_mode: bool = True):
    """
    Parameters
    ----------
    model
        A `llama_cpp.Llama` model instance.
    chat_mode
        Whether to enable chat mode. If `False`, the model will regard
        all `str` inputs as plain text prompts. If `True`, the model will
        regard all `str` inputs as user messages in a chat conversation.

    """
    self.model = model
    self.tokenizer = LlamaCppTokenizer(self.model)

    # Note: llama-cpp-python provides a default chat-template fallback even when
    # the user hasn't explicitly configured one:
    # https://github.com/abetlen/llama-cpp-python/blob/c37132b/llama_cpp/llama.py#L540-L545
    # We keep the default as True because the upstream library generally favors chat-style usage.
    self.type_adapter = LlamaCppTypeAdapter(has_chat_template=chat_mode)

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using llama-cpp-python.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the `Llama.__call__` method of the `llama-cpp-python` library.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/llamacpp.py

def generate(
    self,
    model_input: Union[Chat, str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    prompt = self.type_adapter.format_input(model_input)

    if isinstance(prompt, str):
        completion = self.model(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            **inference_kwargs,
        )
        result = completion["choices"][0]["text"]
    elif isinstance(prompt, list):
        completion = self.model.create_chat_completion(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            **inference_kwargs,
        )
        result = completion["choices"][0]["message"]["content"]
    else:  # Never reached  # pragma: no cover
        raise ValueError("Unexpected prompt type.")

    self.model.reset()

    return result

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using llama-cpp-python.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the `Llama.__call__` method of the `llama-cpp-python` library.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/llamacpp.py

def generate_stream(
    self,
    model_input: Union[Chat, str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    prompt = self.type_adapter.format_input(model_input)

    if isinstance(prompt, str):
        generator = self.model(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            stream=True,
            **inference_kwargs,
        )
        for chunk in generator:
            yield chunk["choices"][0]["text"]

    elif isinstance(prompt, list):
        generator = self.model.create_chat_completion(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            stream=True,
            **inference_kwargs,
        )
        for chunk in generator:
            yield chunk["choices"][0]["delta"].get("content", "")
    else:  # Never reached  # pragma: no cover
        raise ValueError("Unexpected prompt type.")

`MLXLM`

Bases: Model

Thin wrapper around an mlx_lm model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the mlx_lm library.

Source code in outlines/models/mlxlm.py

class MLXLM(Model):
    """Thin wrapper around an `mlx_lm` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `mlx_lm` library.

    """

    tensor_library_name = "mlx"

    def __init__(
        self,
        model: "nn.Module",
        tokenizer: "MLXTokenizer",
    ):
        """
        Parameters
        ----------
        model
            An instance of an `mlx_lm` model.
        tokenizer
            An instance of an `mlx_lm` tokenizer or of a compatible
            `transformers` tokenizer.

        """
        self.model = model
        # self.mlx_tokenizer is used by the mlx-lm in its generate function
        self.mlx_tokenizer = tokenizer
        # self.tokenizer is used by the logits processor
        # tokenizer may be a mlx_lm.TokenizerWrapper (whose ._tokenizer is a
        # PreTrainedTokenizerFast) or a PreTrainedTokenizerFast passed directly
        inner = getattr(tokenizer, "_tokenizer", tokenizer)
        hf_tokenizer = inner if isinstance(inner, PreTrainedTokenizerBase) else tokenizer
        self.tokenizer = TransformerTokenizer(hf_tokenizer)
        self.type_adapter = MLXLMTypeAdapter(
            tokenizer=tokenizer,
            has_chat_template=_check_hf_chat_template(tokenizer)
        )

    def generate(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> str:
        """Generate text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        from mlx_lm import generate

        return generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )

    def generate_batch(
        self,
        model_input: list[str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> list[str]:
        """Generate a batch of text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The list of prompts based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        list[str]
            The list of text generated by the model.

        """
        from mlx_lm import batch_generate

        if output_type:
            raise NotImplementedError(
                "mlx-lm does not support constrained generation with batching."
                + "You cannot provide an `output_type` with this method."
            )

        model_input = [self.type_adapter.format_input(item) for item in model_input]

        # Contrarily to the other generate methods, batch_generate requires
        # tokenized prompts
        add_special_tokens = [
            (
                self.mlx_tokenizer.bos_token is None
                or not prompt.startswith(self.mlx_tokenizer.bos_token)
            )
            for prompt in model_input
        ]
        tokenized_model_input = [
            self.mlx_tokenizer.encode(
                model_input[i], add_special_tokens=add_special_tokens[i]
            )
            for i in range(len(model_input))
        ]

        response = batch_generate(
            self.model,
            self.mlx_tokenizer,
            tokenized_model_input,
            **kwargs,
        )

        return response.texts

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> Iterator[str]:
        """Stream text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        from mlx_lm import stream_generate

        for gen_response in stream_generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        ):
            yield gen_response.text

`init(model, tokenizer)`

Parameters:

Name	Type	Description	Default
`model`	`Module`	An instance of an `mlx_lm` model.	required
`tokenizer`	`MLXTokenizer`	An instance of an `mlx_lm` tokenizer or of a compatible `transformers` tokenizer.	required

Source code in outlines/models/mlxlm.py

def __init__(
    self,
    model: "nn.Module",
    tokenizer: "MLXTokenizer",
):
    """
    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        `transformers` tokenizer.

    """
    self.model = model
    # self.mlx_tokenizer is used by the mlx-lm in its generate function
    self.mlx_tokenizer = tokenizer
    # self.tokenizer is used by the logits processor
    # tokenizer may be a mlx_lm.TokenizerWrapper (whose ._tokenizer is a
    # PreTrainedTokenizerFast) or a PreTrainedTokenizerFast passed directly
    inner = getattr(tokenizer, "_tokenizer", tokenizer)
    hf_tokenizer = inner if isinstance(inner, PreTrainedTokenizerBase) else tokenizer
    self.tokenizer = TransformerTokenizer(hf_tokenizer)
    self.type_adapter = MLXLMTypeAdapter(
        tokenizer=tokenizer,
        has_chat_template=_check_hf_chat_template(tokenizer)
    )

`generate(model_input, output_type=None, **kwargs)`

Generate text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/mlxlm.py

def generate(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> str:
    """Generate text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    from mlx_lm import generate

    return generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    )

`generate_batch(model_input, output_type=None, **kwargs)`

Generate a batch of text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`list[str]`	The list of prompts based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`list[str]`	The list of text generated by the model.

Source code in outlines/models/mlxlm.py

def generate_batch(
    self,
    model_input: list[str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> list[str]:
    """Generate a batch of text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The list of prompts based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    list[str]
        The list of text generated by the model.

    """
    from mlx_lm import batch_generate

    if output_type:
        raise NotImplementedError(
            "mlx-lm does not support constrained generation with batching."
            + "You cannot provide an `output_type` with this method."
        )

    model_input = [self.type_adapter.format_input(item) for item in model_input]

    # Contrarily to the other generate methods, batch_generate requires
    # tokenized prompts
    add_special_tokens = [
        (
            self.mlx_tokenizer.bos_token is None
            or not prompt.startswith(self.mlx_tokenizer.bos_token)
        )
        for prompt in model_input
    ]
    tokenized_model_input = [
        self.mlx_tokenizer.encode(
            model_input[i], add_special_tokens=add_special_tokens[i]
        )
        for i in range(len(model_input))
    ]

    response = batch_generate(
        self.model,
        self.mlx_tokenizer,
        tokenized_model_input,
        **kwargs,
    )

    return response.texts

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/mlxlm.py

def generate_stream(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> Iterator[str]:
    """Stream text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    from mlx_lm import stream_generate

    for gen_response in stream_generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    ):
        yield gen_response.text

`Mistral`

Bases: Model

Thin wrapper around the mistralai.Mistral client.

Converts input and output types to arguments for the mistralai.Mistral client's chat.complete or chat.stream methods.

Source code in outlines/models/mistral.py

class Mistral(Model):
    """Thin wrapper around the `mistralai.Mistral` client.

    Converts input and output types to arguments for the `mistralai.Mistral`
    client's `chat.complete` or `chat.stream` methods.

    """

    def __init__(
        self, client: "MistralClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client : MistralClient
            A mistralai.Mistral client instance.
        model_name : Optional[str]
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = MistralTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate a response from the model.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs : Any
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The response generated by the model as text.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = self.client.chat.complete(
                messages=messages,
                response_format=response_format,
                **inference_kwargs,
            )

        outputs = [choice.message for choice in result.choices]

        if len(outputs) == 1:
            return outputs[0].content
        else:
            return [m.content for m in outputs]

    def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `mistralai` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text chunks generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.stream(
                messages=messages,
                response_format=response_format,
                **inference_kwargs
            )
            for chunk in stream:
                if (
                    hasattr(chunk, "data")
                    and chunk.data.choices
                    and chunk.data.choices[0].delta.content is not None
                ):
                    yield chunk.data.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/mistral.py

def __init__(
    self, client: "MistralClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = MistralTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate a response from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The response generated by the model as text.

Source code in outlines/models/mistral.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate a response from the model.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs : Any
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The response generated by the model as text.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = self.client.chat.complete(
            messages=messages,
            response_format=response_format,
            **inference_kwargs,
        )

    outputs = [choice.message for choice in result.choices]

    if len(outputs) == 1:
        return outputs[0].content
    else:
        return [m.content for m in outputs]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Generate a stream of responses from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text chunks generated by the model.

Source code in outlines/models/mistral.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text chunks generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.stream(
            messages=messages,
            response_format=response_format,
            **inference_kwargs
        )
        for chunk in stream:
            if (
                hasattr(chunk, "data")
                and chunk.data.choices
                and chunk.data.choices[0].delta.content is not None
            ):
                yield chunk.data.choices[0].delta.content

`Model`

Bases: ABC

Base class for all synchronous models.

This class defines shared __call__, batch and stream methods that can be used to call the model directly. The generate, generate_batch, and generate_stream methods must be implemented by the subclasses. All models inheriting from this class must define a type_adapter attribute of type ModelTypeAdapter. The methods of the type_adapter attribute are used in the generate, generate_batch, and generate_stream methods to format the input and output types received by the model. Additionally, steerable models must define a tensor_library_name attribute.

Source code in outlines/models/base.py

class Model(ABC):
    """Base class for all synchronous models.

    This class defines shared `__call__`, `batch` and `stream` methods that can
    be used to call the model directly. The `generate`, `generate_batch`, and
    `generate_stream` methods must be implemented by the subclasses.
    All models inheriting from this class must define a `type_adapter`
    attribute of type `ModelTypeAdapter`. The methods of the `type_adapter`
    attribute are used in the `generate`, `generate_batch`, and
    `generate_stream` methods to format the input and output types received by
    the model.
    Additionally, steerable models must define a `tensor_library_name`
    attribute.

    """
    type_adapter: ModelTypeAdapter
    tensor_library_name: str

    def __call__(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Call the model.

        Users can call the model directly, in which case we will create a
        generator instance with the output type provided and call it.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        generator("prompt")
        ```
        and
        ```python
        model("prompt", Foo)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        from outlines.generator import Generator

        return Generator(self, output_type, backend)(model_input, **inference_kwargs)

    def batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Make a batch call to the model (several inputs at once).

        Users can use the `batch` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `batch` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        generator.batch(["prompt1", "prompt2"])
        ```
        and
        ```python
        model.batch(["prompt1", "prompt2"], Foo)
        ```

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return generator.batch(model_input, **inference_kwargs) # type: ignore

    def stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Stream a response from the model.

        Users can use the `stream` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `stream` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        for chunk in generator("prompt"):
            print(chunk)
        ```
        and
        ```python
        for chunk in model.stream("prompt", Foo):
            print(chunk)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return generator.stream(model_input, **inference_kwargs) # type: ignore

    @abstractmethod
    def generate(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Generate a response from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        ...

    @abstractmethod
    def generate_batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Generate a batch of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        ...
    @abstractmethod
    def generate_stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Generate a stream of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        ...

`call(model_input, output_type=None, backend=None, **inference_kwargs)`

Call the model.

Users can call the model directly, in which case we will create a generator instance with the output type provided and call it. Thus, those commands are equivalent:

generator = Generator(model, Foo)
generator("prompt")

and

model("prompt", Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

def __call__(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> Any:
    """Call the model.

    Users can call the model directly, in which case we will create a
    generator instance with the output type provided and call it.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    generator("prompt")
    ```
    and
    ```python
    model("prompt", Foo)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    from outlines.generator import Generator

    return Generator(self, output_type, backend)(model_input, **inference_kwargs)

`batch(model_input, output_type=None, backend=None, **inference_kwargs)`

Make a batch call to the model (several inputs at once).

Users can use the batch method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its batch method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
generator.batch(["prompt1", "prompt2"])

and

model.batch(["prompt1", "prompt2"], Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

def batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Make a batch call to the model (several inputs at once).

    Users can use the `batch` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `batch` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    generator.batch(["prompt1", "prompt2"])
    ```
    and
    ```python
    model.batch(["prompt1", "prompt2"], Foo)
    ```

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return generator.batch(model_input, **inference_kwargs) # type: ignore

`generate(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a response from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

@abstractmethod
def generate(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Generate a response from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    ...

`generate_batch(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a batch of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

@abstractmethod
def generate_batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Generate a batch of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    ...

`generate_stream(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a stream of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[Any]`	A stream of responses from the model.

Source code in outlines/models/base.py

@abstractmethod
def generate_stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Generate a stream of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    ...

`stream(model_input, output_type=None, backend=None, **inference_kwargs)`

Stream a response from the model.

Users can use the stream method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its stream method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
for chunk in generator("prompt"):
    print(chunk)

and

for chunk in model.stream("prompt", Foo):
    print(chunk)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[Any]`	A stream of responses from the model.

Source code in outlines/models/base.py

def stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Stream a response from the model.

    Users can use the `stream` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `stream` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    for chunk in generator("prompt"):
        print(chunk)
    ```
    and
    ```python
    for chunk in model.stream("prompt", Foo):
        print(chunk)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return generator.stream(model_input, **inference_kwargs) # type: ignore

`ModelTypeAdapter`

Bases: ABC

Base class for all model type adapters.

A type adapter instance must be given as a value to the type_adapter attribute when instantiating a model. The type adapter is responsible for formatting the input and output types passed to the model to match the specific format expected by the associated model.

Source code in outlines/models/base.py

class ModelTypeAdapter(ABC):
    """Base class for all model type adapters.

    A type adapter instance must be given as a value to the `type_adapter`
    attribute when instantiating a model.
    The type adapter is responsible for formatting the input and output types
    passed to the model to match the specific format expected by the
    associated model.

    """

    @abstractmethod
    def format_input(self, model_input: Any) -> Any:
        """Format the user input to the expected format of the model.

        For API-based models, it typically means creating the `messages`
        argument passed to the client. For local models, it can mean casting
        the input from str to list for instance.
        This method is also used to validate that the input type provided by
        the user is supported by the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        Any
            The formatted input to be passed to the model.

        """
        ...

    @abstractmethod
    def format_output_type(self, output_type: Optional[Any] = None) -> Any:
        """Format the output type to the expected format of the model.

        For black-box models, this typically means creating a `response_format`
        argument. For steerable models, it means formatting the logits processor
        to create the object type expected by the model.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        Any
            The formatted output type to be passed to the model.

        """
        ...

`format_input(model_input)` `abstractmethod`

Format the user input to the expected format of the model.

For API-based models, it typically means creating the messages argument passed to the client. For local models, it can mean casting the input from str to list for instance. This method is also used to validate that the input type provided by the user is supported by the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required

Returns:

Type	Description
`Any`	The formatted input to be passed to the model.

Source code in outlines/models/base.py

@abstractmethod
def format_input(self, model_input: Any) -> Any:
    """Format the user input to the expected format of the model.

    For API-based models, it typically means creating the `messages`
    argument passed to the client. For local models, it can mean casting
    the input from str to list for instance.
    This method is also used to validate that the input type provided by
    the user is supported by the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    Any
        The formatted input to be passed to the model.

    """
    ...

`format_output_type(output_type=None)` `abstractmethod`

Format the output type to the expected format of the model.

For black-box models, this typically means creating a response_format argument. For steerable models, it means formatting the logits processor to create the object type expected by the model.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`Any`	The formatted output type to be passed to the model.

Source code in outlines/models/base.py

@abstractmethod
def format_output_type(self, output_type: Optional[Any] = None) -> Any:
    """Format the output type to the expected format of the model.

    For black-box models, this typically means creating a `response_format`
    argument. For steerable models, it means formatting the logits processor
    to create the object type expected by the model.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    Any
        The formatted output type to be passed to the model.

    """
    ...

`Ollama`

Bases: Model

Thin wrapper around the ollama.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the ollama.Client client.

Source code in outlines/models/ollama.py

class Ollama(Model):
    """Thin wrapper around the `ollama.Client` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `ollama.Client` client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            The `ollama.Client` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OllamaTypeAdapter()

    def generate(self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        print(self.type_adapter.format_input(model_input))

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                **kwargs,
            )

        return response.message.content

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `ollama` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                stream=True,
                **kwargs,
            )
            for chunk in response:
                yield chunk.message.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	The `ollama.Client` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/ollama.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        The `ollama.Client` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OllamaTypeAdapter()

`generate(model_input, output_type=None, **kwargs)`

Generate text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/ollama.py

def generate(self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    print(self.type_adapter.format_input(model_input))

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )

    return response.message.content

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/ollama.py

def generate_stream(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> Iterator[str]:
    """Stream text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            stream=True,
            **kwargs,
        )
        for chunk in response:
            yield chunk.message.content

`OpenAI`

Bases: Model

Thin wrapper around the openai.OpenAI client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client.

Source code in outlines/models/openai.py

class OpenAI(Model):
    """Thin wrapper around the `openai.OpenAI` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client.

    """

    def __init__(
        self,
        client: Union["OpenAIClient", "AzureOpenAIClient"],
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `openai.OpenAI` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OpenAITypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = self.client.chat.completions.create(
                messages=messages,
                **response_format,
                **inference_kwargs,
            )

        messages = [choice.message for choice in result.choices]
        for message in messages:
            if message.refusal is not None:
                raise GenerationError(
                    f"OpenAI refused to answer the request: {message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `openai` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Stream text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                stream=True,
                messages=messages,
                **response_format,
                **inference_kwargs
            )
            for chunk in stream:
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AzureOpenAI]`	The `openai.OpenAI` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/openai.py

def __init__(
    self,
    client: Union["OpenAIClient", "AzureOpenAIClient"],
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `openai.OpenAI` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OpenAITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/openai.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = self.client.chat.completions.create(
            messages=messages,
            **response_format,
            **inference_kwargs,
        )

    messages = [choice.message for choice in result.choices]
    for message in messages:
        if message.refusal is not None:
            raise GenerationError(
                f"OpenAI refused to answer the request: {message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/openai.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Stream text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            stream=True,
            messages=messages,
            **response_format,
            **inference_kwargs
        )
        for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`SGLang`

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py

class SGLang(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The SGLang server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "SGLang does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`		An `openai.OpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/sglang.py

def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using SGLang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/sglang.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The SGLang server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using SGLang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`TGI`

Bases: Model

Thin wrapper around a huggingface_hub.InferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.InferenceClient client.

Source code in outlines/models/tgi.py

class TGI(Model):
    """Thin wrapper around a `huggingface_hub.InferenceClient` client used to
    communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.InferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `InferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            return self.client.text_generation(**client_args)

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("TGI does not support batch inference.")

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.text_generation(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

`init(client)`

Parameters:

Name	Type	Description	Default
`client`		A huggingface `InferenceClient` client instance.	required

Source code in outlines/models/tgi.py

def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `InferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/tgi.py

def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        return self.client.text_generation(**client_args)

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py

def generate_stream(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.text_generation(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            yield chunk

`TransformerTokenizer`

Bases: Tokenizer

Represents a tokenizer for models in the transformers library.

Source code in outlines/models/transformers.py

class TransformerTokenizer(Tokenizer):
    """Represents a tokenizer for models in the `transformers` library."""

    def __init__(self, tokenizer: "PreTrainedTokenizer", **kwargs):
        self.tokenizer = tokenizer
        self.eos_token_id = self.tokenizer.eos_token_id
        self.eos_token = self.tokenizer.eos_token
        self.get_vocab = self.tokenizer.get_vocab

        if self.tokenizer.pad_token_id is None:
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
            self.pad_token_id = self.eos_token_id
        else:
            self.pad_token_id = self.tokenizer.pad_token_id
            self.pad_token = self.tokenizer.pad_token

        self.special_tokens = set(self.tokenizer.all_special_tokens)

        self.vocabulary = self.tokenizer.get_vocab()
        self.is_llama = isinstance(self.tokenizer, get_llama_tokenizer_types())

    def encode(
        self, prompt: Union[str, List[str]], **kwargs
    ) -> Tuple["torch.LongTensor", "torch.LongTensor"]:
        kwargs["padding"] = True
        kwargs["return_tensors"] = "pt"
        output = self.tokenizer(prompt, **kwargs)
        return output["input_ids"], output["attention_mask"]

    def decode(self, token_ids: "torch.LongTensor") -> List[str]:
        text = self.tokenizer.batch_decode(token_ids, skip_special_tokens=True)
        return text

    def convert_token_to_string(self, token: str) -> str:
        string = self.tokenizer.convert_tokens_to_string([token])

        if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
            return " " + string

        return string

    def __eq__(self, other):
        if isinstance(other, type(self)):
            if hasattr(self, "model_name") and hasattr(self, "kwargs"):
                return (
                    other.model_name == self.model_name and other.kwargs == self.kwargs
                )
            else:
                return other.tokenizer == self.tokenizer
        return NotImplemented

    def __hash__(self):
        from datasets.fingerprint import Hasher

        return hash(Hasher.hash(self.tokenizer))

    def __getstate__(self):
        state = {"tokenizer": self.tokenizer}
        return state

    def __setstate__(self, state):
        self.__init__(state["tokenizer"])

`Transformers`

Bases: Model

Thin wrapper around a transformers model and a transformers tokenizer.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and tokenizer.

Source code in outlines/models/transformers.py

class Transformers(Model):
    """Thin wrapper around a `transformers` model and a `transformers`
    tokenizer.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    tokenizer.

    """

    def __init__(
        self,
        model: "PreTrainedModel",
        tokenizer: "PreTrainedTokenizer",
        *,
        device_dtype: Optional["torch.dtype"] = None,
    ):
        """
        Parameters:
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        tokenizer
            A `PreTrainedTokenizer`, or any tokenizer that is compatible with
            the `transformers` API for tokenizers.
        device_dtype
            The dtype to use for the model. If not provided, the model will use
            the default dtype.

        """
        # We need to handle the cases in which jax/flax or tensorflow
        # is not available in the environment.
        try:
            from transformers import FlaxPreTrainedModel
        except ImportError:  # pragma: no cover
            FlaxPreTrainedModel = None

        try:
            from transformers import TFPreTrainedModel
        except ImportError:  # pragma: no cover
            TFPreTrainedModel = None

        tokenizer.padding_side = "left"
        self.model = model
        self.hf_tokenizer = tokenizer
        self.tokenizer = TransformerTokenizer(tokenizer)
        self.device_dtype = device_dtype
        self.type_adapter = TransformersTypeAdapter(
            tokenizer=tokenizer,
            has_chat_template=_check_hf_chat_template(tokenizer)
        )

        if (
            FlaxPreTrainedModel is not None
            and isinstance(model, FlaxPreTrainedModel)
        ):  # pragma: no cover
            self.tensor_library_name = "jax"
            warnings.warn("""
                Support for `jax` has been deprecated and will be removed in
                version 1.4.0 of Outlines. Please use `torch` instead.
                Transformers models using `jax` do not support structured
                generation.
                """,
                DeprecationWarning,
                stacklevel=2,
            )
        elif (
            TFPreTrainedModel is not None
            and isinstance(model, TFPreTrainedModel)
        ):  # pragma: no cover
            self.tensor_library_name = "tensorflow"
            warnings.warn("""
                Support for `tensorflow` has been deprecated and will be removed in
                version 1.4.0 of Outlines. Please use `torch` instead.
                Transformers models using `tensorflow` do not support structured
                generation.
                """,
                DeprecationWarning,
                stacklevel=2,
            )
        else:
            self.tensor_library_name = "torch"

    def _prepare_model_inputs(
        self,
        model_input,
        is_batch: bool = False,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        # Format validation
        if is_batch:
            prompts = [
                self.type_adapter.format_input(item)
                for item in model_input
            ]
        else:
            prompts = self.type_adapter.format_input(model_input)
        input_ids, attention_mask = self.tokenizer.encode(prompts)
        inputs = {
            "input_ids": input_ids.to(self.model.device),
            "attention_mask": (
                attention_mask.to(self.model.device, dtype=self.device_dtype)
                if self.device_dtype is not None
                else attention_mask.to(self.model.device)
            ),
        }

        return prompts, inputs

    def generate(
        self,
        model_input: Union[str, dict, Chat],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str]]:
        """Generate text using `transformers`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response. For
            multi-modal models, the input should be a dictionary containing the
            `text` key with a value of type `Union[str, List[str]]` and the
            other keys required by the model.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            of the `transformers` model.

        Returns
        -------
        Union[str, List[str]]
            The text generated by the model.

        """
        prompts, inputs = self._prepare_model_inputs(model_input, False)
        logits_processor = self.type_adapter.format_output_type(output_type)

        generated_ids = self._generate_output_seq(
            prompts,
            inputs,
            logits_processor=logits_processor,
            **inference_kwargs,
        )

        # required for multi-modal models that return a 2D tensor even when
        # num_return_sequences is 1
        num_samples = inference_kwargs.get("num_return_sequences", 1)
        if num_samples == 1 and len(generated_ids.shape) == 2:
            generated_ids = generated_ids.squeeze(0)

        return self._decode_generation(generated_ids)

    def generate_batch(
        self,
        model_input: List[Union[str, dict, Chat]],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> List[Union[str, List[str]]]:
        """"""
        prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore
        logits_processor = self.type_adapter.format_output_type(output_type)

        generated_ids = self._generate_output_seq(
            prompts, inputs, logits_processor=logits_processor, **inference_kwargs
        )

        # if there are multiple samples per input, convert generated_id to 3D
        num_samples = inference_kwargs.get("num_return_sequences", 1)
        if num_samples > 1:
            generated_ids = generated_ids.view(len(model_input), num_samples, -1)

        return self._decode_generation(generated_ids)

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `transformers` models.

        TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

        """
        raise NotImplementedError(
            "Streaming is not implemented for Transformers models."
        )

    def _generate_output_seq(self, prompts, inputs, **inference_kwargs):
        input_ids = inputs["input_ids"]

        output_ids = self.model.generate(
            **inputs,
            **inference_kwargs,
        )

        # encoder-decoder returns output_ids only, decoder-only returns full seq ids
        if self.model.config.is_encoder_decoder:
            generated_ids = output_ids
        else:
            generated_ids = output_ids[:, input_ids.shape[1] :]

        return generated_ids

    def _decode_generation(self, generated_ids: "torch.Tensor"):
        if len(generated_ids.shape) == 1:
            return self.tokenizer.decode([generated_ids])[0]
        elif len(generated_ids.shape) == 2:
            return self.tokenizer.decode(generated_ids)
        elif len(generated_ids.shape) == 3:
            return [
                self.tokenizer.decode(generated_ids[i])
                for i in range(len(generated_ids))
            ]
        else:  # pragma: no cover
            raise TypeError(
                "Generated outputs aren't 1D, 2D or 3D, but instead are "
                f"{generated_ids.shape}"
            )

`init(model, tokenizer, *, device_dtype=None)`

Parameters:

model A PreTrainedModel, or any model that is compatible with the transformers API for models. tokenizer A PreTrainedTokenizer, or any tokenizer that is compatible with the transformers API for tokenizers. device_dtype The dtype to use for the model. If not provided, the model will use the default dtype.

Source code in outlines/models/transformers.py

def __init__(
    self,
    model: "PreTrainedModel",
    tokenizer: "PreTrainedTokenizer",
    *,
    device_dtype: Optional["torch.dtype"] = None,
):
    """
    Parameters:
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    tokenizer
        A `PreTrainedTokenizer`, or any tokenizer that is compatible with
        the `transformers` API for tokenizers.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    """
    # We need to handle the cases in which jax/flax or tensorflow
    # is not available in the environment.
    try:
        from transformers import FlaxPreTrainedModel
    except ImportError:  # pragma: no cover
        FlaxPreTrainedModel = None

    try:
        from transformers import TFPreTrainedModel
    except ImportError:  # pragma: no cover
        TFPreTrainedModel = None

    tokenizer.padding_side = "left"
    self.model = model
    self.hf_tokenizer = tokenizer
    self.tokenizer = TransformerTokenizer(tokenizer)
    self.device_dtype = device_dtype
    self.type_adapter = TransformersTypeAdapter(
        tokenizer=tokenizer,
        has_chat_template=_check_hf_chat_template(tokenizer)
    )

    if (
        FlaxPreTrainedModel is not None
        and isinstance(model, FlaxPreTrainedModel)
    ):  # pragma: no cover
        self.tensor_library_name = "jax"
        warnings.warn("""
            Support for `jax` has been deprecated and will be removed in
            version 1.4.0 of Outlines. Please use `torch` instead.
            Transformers models using `jax` do not support structured
            generation.
            """,
            DeprecationWarning,
            stacklevel=2,
        )
    elif (
        TFPreTrainedModel is not None
        and isinstance(model, TFPreTrainedModel)
    ):  # pragma: no cover
        self.tensor_library_name = "tensorflow"
        warnings.warn("""
            Support for `tensorflow` has been deprecated and will be removed in
            version 1.4.0 of Outlines. Please use `torch` instead.
            Transformers models using `tensorflow` do not support structured
            generation.
            """,
            DeprecationWarning,
            stacklevel=2,
        )
    else:
        self.tensor_library_name = "torch"

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using transformers.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[str, dict, Chat]`	The prompt based on which the model will generate a response. For multi-modal models, the input should be a dictionary containing the `text` key with a value of type `Union[str, List[str]]` and the other keys required by the model.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method of the `transformers` model.	`{}`

Returns:

Type	Description
`Union[str, List[str]]`	The text generated by the model.

Source code in outlines/models/transformers.py

def generate(
    self,
    model_input: Union[str, dict, Chat],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str]]:
    """Generate text using `transformers`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response. For
        multi-modal models, the input should be a dictionary containing the
        `text` key with a value of type `Union[str, List[str]]` and the
        other keys required by the model.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        of the `transformers` model.

    Returns
    -------
    Union[str, List[str]]
        The text generated by the model.

    """
    prompts, inputs = self._prepare_model_inputs(model_input, False)
    logits_processor = self.type_adapter.format_output_type(output_type)

    generated_ids = self._generate_output_seq(
        prompts,
        inputs,
        logits_processor=logits_processor,
        **inference_kwargs,
    )

    # required for multi-modal models that return a 2D tensor even when
    # num_return_sequences is 1
    num_samples = inference_kwargs.get("num_return_sequences", 1)
    if num_samples == 1 and len(generated_ids.shape) == 2:
        generated_ids = generated_ids.squeeze(0)

    return self._decode_generation(generated_ids)

`generate_batch(model_input, output_type=None, **inference_kwargs)`

Source code in outlines/models/transformers.py

def generate_batch(
    self,
    model_input: List[Union[str, dict, Chat]],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> List[Union[str, List[str]]]:
    """"""
    prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore
    logits_processor = self.type_adapter.format_output_type(output_type)

    generated_ids = self._generate_output_seq(
        prompts, inputs, logits_processor=logits_processor, **inference_kwargs
    )

    # if there are multiple samples per input, convert generated_id to 3D
    num_samples = inference_kwargs.get("num_return_sequences", 1)
    if num_samples > 1:
        generated_ids = generated_ids.view(len(model_input), num_samples, -1)

    return self._decode_generation(generated_ids)

`generate_stream(model_input, output_type, **inference_kwargs)`

Not available for transformers models.

TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

Source code in outlines/models/transformers.py

def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `transformers` models.

    TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

    """
    raise NotImplementedError(
        "Streaming is not implemented for Transformers models."
    )

`TransformersMultiModal`

Bases: Transformers

Thin wrapper around a transformers model and a transformers processor.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and processor.

Source code in outlines/models/transformers.py

class TransformersMultiModal(Transformers):
    """Thin wrapper around a `transformers` model and a `transformers`
    processor.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    processor.

    """

    def __init__(
        self,
        model: "PreTrainedModel",
        processor,
        *,
        device_dtype: Optional["torch.dtype"] = None,
    ):
        """Create a TransformersMultiModal model instance

        We rely on the `__init__` method of the `Transformers` class to handle
        most of the initialization and then add elements specific to multimodal
        models.

        Parameters
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        processor
            A `ProcessorMixin` instance.
        device_dtype
            The dtype to use for the model. If not provided, the model will use
            the default dtype.

        """
        self.processor = processor
        self.processor.padding_side = "left"
        self.processor.pad_token = "[PAD]"

        tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

        super().__init__(model, tokenizer, device_dtype=device_dtype)

        self.type_adapter = TransformersMultiModalTypeAdapter(
            tokenizer=tokenizer
        )

    def _prepare_model_inputs(
        self,
        model_input,
        is_batch: bool = False,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        if is_batch:
            prompts = [
                self.type_adapter.format_input(item) for item in model_input
            ]
        else:
            prompts = self.type_adapter.format_input(model_input)

        # The expected format is a single dict
        if is_batch:
            merged_prompts = defaultdict(list)
            for d in prompts:
                for key, value in d.items():
                    if key == "text":
                        merged_prompts[key].append(value)
                    else:
                        merged_prompts[key].extend(value)
        else:
            merged_prompts = prompts # type: ignore

        inputs = self.processor(
            **merged_prompts, padding=True, return_tensors="pt"
        )
        if self.device_dtype is not None:
            inputs = inputs.to(self.model.device, dtype=self.device_dtype)
        else:
            inputs = inputs.to(self.model.device)

        return merged_prompts["text"], inputs

`init(model, processor, *, device_dtype=None)`

Create a TransformersMultiModal model instance

We rely on the __init__ method of the Transformers class to handle most of the initialization and then add elements specific to multimodal models.

Parameters:

Name	Type	Description	Default
`model`	`PreTrainedModel`	A `PreTrainedModel`, or any model that is compatible with the `transformers` API for models.	required
`processor`		A `ProcessorMixin` instance.	required
`device_dtype`	`Optional[dtype]`	The dtype to use for the model. If not provided, the model will use the default dtype.	`None`

Source code in outlines/models/transformers.py

def __init__(
    self,
    model: "PreTrainedModel",
    processor,
    *,
    device_dtype: Optional["torch.dtype"] = None,
):
    """Create a TransformersMultiModal model instance

    We rely on the `__init__` method of the `Transformers` class to handle
    most of the initialization and then add elements specific to multimodal
    models.

    Parameters
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    processor
        A `ProcessorMixin` instance.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    """
    self.processor = processor
    self.processor.padding_side = "left"
    self.processor.pad_token = "[PAD]"

    tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

    super().__init__(model, tokenizer, device_dtype=device_dtype)

    self.type_adapter = TransformersMultiModalTypeAdapter(
        tokenizer=tokenizer
    )

`VLLM`

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py

class VLLM(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "OpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("VLLM does not support batch inference.")

    def generate_stream(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`OpenAI`	An `openai.OpenAI` client instance.	required

Source code in outlines/models/vllm.py

def __init__(
    self,
    client: "OpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/vllm.py

def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py

def generate_stream(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`VLLMOffline`

Bases: Model

Thin wrapper around a vllm.LLM model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the vllm.LLM model.

Source code in outlines/models/vllm_offline.py

class VLLMOffline(Model):
    """Thin wrapper around a `vllm.LLM` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `vllm.LLM` model.

    """

    def __init__(self, model: "LLM"):
        """Create a VLLM model instance.

        Parameters
        ----------
        model
            A `vllm.LLM` model instance.

        """
        self.model = model
        self.tokenizer = self.model.get_tokenizer()
        self.type_adapter = VLLMOfflineTypeAdapter(has_chat_template=self._check_chat_template())

    def _build_generation_args(
        self,
        inference_kwargs: dict,
        output_type: Optional[Any] = None,
    ) -> "SamplingParams":
        """Create the `SamplingParams` object to pass to the `generate` method
        of the `vllm.LLM` model."""
        from vllm.sampling_params import StructuredOutputsParams, SamplingParams

        sampling_params = inference_kwargs.pop("sampling_params", None)

        if sampling_params is None:
            sampling_params = SamplingParams()

        output_type_args = self.type_adapter.format_output_type(output_type)
        if output_type_args:
            original_sampling_params_dict = {f: getattr(sampling_params, f) for f in sampling_params.__struct_fields__}
            sampling_params_dict = {**original_sampling_params_dict, "structured_outputs": StructuredOutputsParams(**output_type_args)}
            sampling_params = SamplingParams(**sampling_params_dict)

        return sampling_params

    def generate(
        self,
        model_input: Chat | str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str]]:
        """Generate text using vLLM offline.

        Parameters
        ----------
        prompt
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            in the `vllm.LLM` model.

        Returns
        -------
        Union[str, List[str]]
            The text generated by the model.

        """
        sampling_params = self._build_generation_args(
            inference_kwargs,
            output_type,
        )

        model_input = self.type_adapter.format_input(model_input)

        if isinstance(model_input, list):
            results = self.model.chat(
                messages=model_input,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        else:
            results = self.model.generate(
                prompts=model_input,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        results = [completion.text for completion in results[0].outputs]

        if len(results) == 1:
            return results[0]
        else:
            return results

    def generate_batch(
        self,
        model_input: List[Chat | str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[List[str], List[List[str]]]:
        """Generate a batch of completions using vLLM offline.

        Parameters
        ----------
        prompt
            The list of prompts based on which the model will generate a
            response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            in the `vllm.LLM` model.

        Returns
        -------
        Union[List[str], List[List[str]]]
            The text generated by the model.

        """
        sampling_params = self._build_generation_args(
            inference_kwargs,
            output_type,
        )

        model_inputs = [self.type_adapter.format_input(item) for item in model_input]

        if model_inputs and isinstance(model_inputs[0], list):
            results = self.model.chat(
                messages=model_inputs,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        else:
            results = self.model.generate(
                prompts=model_inputs,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        return [[sample.text for sample in batch.outputs] for batch in results]

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `vllm.LLM`.

        TODO: Implement the streaming functionality ourselves.

        """
        raise NotImplementedError(
            "Streaming is not available for the vLLM offline integration."
        )

    def _check_chat_template(self) -> bool:
        """Check if the tokenizer has a chat template."""
        # 1. Try HuggingFace-style chat template check (get_chat_template).
        # Only return early on True; on False or any exception fall through to
        # step 2 so that vLLM-style tokenizers are still handled correctly.
        if hasattr(self.tokenizer, "chat_template") or hasattr(self.tokenizer, "apply_chat_template"):
            try:
                from outlines.models.tokenizer import _check_hf_chat_template
                if _check_hf_chat_template(self.tokenizer):
                    return True
            except Exception:
                pass

        # 2. Try vLLM-style apply_chat_template (works for old and new vLLM).
        if hasattr(self.tokenizer, "apply_chat_template"):
            try:
                self.tokenizer.apply_chat_template([{"role": "user", "content": "test"}])
                return True
            except Exception:
                pass

        # 3. Default: no chat template
        return False

`init(model)`

Create a VLLM model instance.

Parameters:

Name	Type	Description	Default
`model`	`LLM`	A `vllm.LLM` model instance.	required

Source code in outlines/models/vllm_offline.py

def __init__(self, model: "LLM"):
    """Create a VLLM model instance.

    Parameters
    ----------
    model
        A `vllm.LLM` model instance.

    """
    self.model = model
    self.tokenizer = self.model.get_tokenizer()
    self.type_adapter = VLLMOfflineTypeAdapter(has_chat_template=self._check_chat_template())

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using vLLM offline.

Parameters:

Name	Type	Description	Default
`prompt`		The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method in the `vllm.LLM` model.	`{}`

Returns:

Type	Description
`Union[str, List[str]]`	The text generated by the model.

Source code in outlines/models/vllm_offline.py

def generate(
    self,
    model_input: Chat | str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str]]:
    """Generate text using vLLM offline.

    Parameters
    ----------
    prompt
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        in the `vllm.LLM` model.

    Returns
    -------
    Union[str, List[str]]
        The text generated by the model.

    """
    sampling_params = self._build_generation_args(
        inference_kwargs,
        output_type,
    )

    model_input = self.type_adapter.format_input(model_input)

    if isinstance(model_input, list):
        results = self.model.chat(
            messages=model_input,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    else:
        results = self.model.generate(
            prompts=model_input,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    results = [completion.text for completion in results[0].outputs]

    if len(results) == 1:
        return results[0]
    else:
        return results

`generate_batch(model_input, output_type=None, **inference_kwargs)`

Generate a batch of completions using vLLM offline.

Parameters:

Name	Type	Description	Default
`prompt`		The list of prompts based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method in the `vllm.LLM` model.	`{}`

Returns:

Type	Description
`Union[List[str], List[List[str]]]`	The text generated by the model.

Source code in outlines/models/vllm_offline.py

def generate_batch(
    self,
    model_input: List[Chat | str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[List[str], List[List[str]]]:
    """Generate a batch of completions using vLLM offline.

    Parameters
    ----------
    prompt
        The list of prompts based on which the model will generate a
        response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        in the `vllm.LLM` model.

    Returns
    -------
    Union[List[str], List[List[str]]]
        The text generated by the model.

    """
    sampling_params = self._build_generation_args(
        inference_kwargs,
        output_type,
    )

    model_inputs = [self.type_adapter.format_input(item) for item in model_input]

    if model_inputs and isinstance(model_inputs[0], list):
        results = self.model.chat(
            messages=model_inputs,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    else:
        results = self.model.generate(
            prompts=model_inputs,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    return [[sample.text for sample in batch.outputs] for batch in results]

`generate_stream(model_input, output_type, **inference_kwargs)`

Not available for vllm.LLM.

TODO: Implement the streaming functionality ourselves.

Source code in outlines/models/vllm_offline.py

def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `vllm.LLM`.

    TODO: Implement the streaming functionality ourselves.

    """
    raise NotImplementedError(
        "Streaming is not available for the vLLM offline integration."
    )

`from_anthropic(client, model_name=None)`

Create an Outlines Anthropic model instance from an anthropic.Anthropic client instance.

Parameters:

Name	Type	Description	Default
`client`	`Anthropic`	An `anthropic.Anthropic` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Anthropic`	An Outlines `Anthropic` model instance.

Source code in outlines/models/anthropic.py

def from_anthropic(
    client: "AnthropicClient", model_name: Optional[str] = None
) -> Anthropic:
    """Create an Outlines `Anthropic` model instance from an
    `anthropic.Anthropic` client instance.

    Parameters
    ----------
    client
        An `anthropic.Anthropic` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Anthropic
        An Outlines `Anthropic` model instance.

    """
    return Anthropic(client, model_name)

`from_dottxt(client, model=None)`

Create an Outlines Dottxt or AsyncDottxt model instance from a dottxt.DotTxt or dottxt.AsyncDotTxt client instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[DotTxt, AsyncDotTxt]`	A `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Returns:

Type	Description
`Union[Dottxt, AsyncDottxt]`	An Outlines `Dottxt` or `AsyncDottxt` model instance.

Source code in outlines/models/dottxt.py

def from_dottxt(
    client: "Union[DottxtClient, AsyncDottxtClient]",
    model: Optional[str] = None,
) -> Union[Dottxt, AsyncDottxt]:
    """Create an Outlines `Dottxt` or `AsyncDottxt` model instance from a
    `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.

    Parameters
    ----------
    client
        A `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    Returns
    -------
    Union[Dottxt, AsyncDottxt]
        An Outlines `Dottxt` or `AsyncDottxt` model instance.

    """
    from dottxt import AsyncDotTxt, DotTxt

    if isinstance(client, DotTxt):
        return Dottxt(client, model)
    elif isinstance(client, AsyncDotTxt):
        return AsyncDottxt(client, model)
    else:
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "`dottxt.DotTxt` or `dottxt.AsyncDotTxt`."
        )

`from_gemini(client, model_name=None)`

Create an Outlines Gemini model instance from a google.genai.Client instance.

Parameters:

Name	Type	Description	Default
`client`	`Client`	A `google.genai.Client` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Gemini`	An Outlines `Gemini` model instance.

Source code in outlines/models/gemini.py

def from_gemini(client: "Client", model_name: Optional[str] = None) -> Gemini:
    """Create an Outlines `Gemini` model instance from a
    `google.genai.Client` instance.

    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Gemini
        An Outlines `Gemini` model instance.

    """
    return Gemini(client, model_name)

`from_llamacpp(model, chat_mode=True)`

Create an Outlines LlamaCpp model instance from a llama_cpp.Llama instance.

Parameters:

Name	Type	Description	Default
`model`	`Llama`	A `llama_cpp.Llama` instance.	required
`chat_mode`	`bool`	Whether to enable chat mode. If `False`, the model will regard all `str` inputs as plain text prompts. If `True`, the model will regard all `str` inputs as user messages in a chat conversation.	`True`

Returns:

Type	Description
`LlamaCpp`	An Outlines `LlamaCpp` model instance.

Source code in outlines/models/llamacpp.py

def from_llamacpp(model: "Llama", chat_mode: bool = True) -> LlamaCpp:
    """Create an Outlines `LlamaCpp` model instance from a
    `llama_cpp.Llama` instance.

    Parameters
    ----------
    model
        A `llama_cpp.Llama` instance.
    chat_mode
        Whether to enable chat mode. If `False`, the model will regard
        all `str` inputs as plain text prompts. If `True`, the model will
        regard all `str` inputs as user messages in a chat conversation.

    Returns
    -------
    LlamaCpp
        An Outlines `LlamaCpp` model instance.

    """
    return LlamaCpp(model, chat_mode=chat_mode)

`from_lmstudio(client, model_name=None)`

Create an Outlines LMStudio model instance from a lmstudio.Client or lmstudio.AsyncClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[Client, AsyncClient]`	A `lmstudio.Client` or `lmstudio.AsyncClient` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[LMStudio, AsyncLMStudio]`	An Outlines `LMStudio` or `AsyncLMStudio` model instance.

Source code in outlines/models/lmstudio.py

def from_lmstudio(
    client: Union["Client", "AsyncClient"],
    model_name: Optional[str] = None,
) -> Union[LMStudio, AsyncLMStudio]:
    """Create an Outlines `LMStudio` model instance from a
    `lmstudio.Client` or `lmstudio.AsyncClient` instance.

    Parameters
    ----------
    client
        A `lmstudio.Client` or `lmstudio.AsyncClient` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[LMStudio, AsyncLMStudio]
        An Outlines `LMStudio` or `AsyncLMStudio` model instance.

    """
    from lmstudio import AsyncClient, Client

    if isinstance(client, Client):
        return LMStudio(client, model_name)
    elif isinstance(client, AsyncClient):
        return AsyncLMStudio(client, model_name)
    else:
        raise ValueError(
            "Invalid client type, the client must be an instance of "
            "`lmstudio.Client` or `lmstudio.AsyncClient`."
        )

`from_mistral(client, model_name=None, async_client=False)`

Create an Outlines Mistral model instance from a mistralai.Mistral client.

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`
`async_client`	`bool`	If True, return an AsyncMistral instance; otherwise, return a Mistral instance.	`False`

Returns:

Type	Description
`Union[Mistral, AsyncMistral]`	An Outlines Mistral or AsyncMistral model instance.

Source code in outlines/models/mistral.py

def from_mistral(
    client: "MistralClient",
    model_name: Optional[str] = None,
    async_client: bool = False,
) -> Union[Mistral, AsyncMistral]:
    """Create an Outlines Mistral model instance from a mistralai.Mistral
    client.

    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.
    async_client : bool
        If True, return an AsyncMistral instance;
        otherwise, return a Mistral instance.

    Returns
    -------
    Union[Mistral, AsyncMistral]
        An Outlines Mistral or AsyncMistral model instance.

    """
    from mistralai import Mistral as MistralClient

    if not isinstance(client, MistralClient):
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "`mistralai.Mistral`."
        )

    if async_client:
        return AsyncMistral(client, model_name)
    else:
        return Mistral(client, model_name)

`from_mlxlm(model, tokenizer)`

Create an Outlines MLXLM model instance from an mlx_lm model and a tokenizer.

Parameters:

Name	Type	Description	Default
`model`	`Module`	An instance of an `mlx_lm` model.	required
`tokenizer`	`MLXTokenizer`	An instance of an `mlx_lm` tokenizer or of a compatible transformers tokenizer.	required

Returns:

Type	Description
`MLXLM`	An Outlines `MLXLM` model instance.

Source code in outlines/models/mlxlm.py

def from_mlxlm(model: "nn.Module", tokenizer: "MLXTokenizer") -> MLXLM:
    """Create an Outlines `MLXLM` model instance from an `mlx_lm` model and a
    tokenizer.

    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        transformers tokenizer.

    Returns
    -------
    MLXLM
        An Outlines `MLXLM` model instance.

    """
    return MLXLM(model, tokenizer)

`from_ollama(client, model_name=None)`

Create an Outlines Ollama model instance from an ollama.Client or ollama.AsyncClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[Client, AsyncClient]`	A `ollama.Client` or `ollama.AsyncClient` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[Ollama, AsyncOllama]`	An Outlines `Ollama` or `AsyncOllama` model instance.

Source code in outlines/models/ollama.py

def from_ollama(
    client: Union["Client", "AsyncClient"], model_name: Optional[str] = None
) -> Union[Ollama, AsyncOllama]:
    """Create an Outlines `Ollama` model instance from an `ollama.Client`
    or `ollama.AsyncClient` instance.

    Parameters
    ----------
    client
        A `ollama.Client` or `ollama.AsyncClient` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[Ollama, AsyncOllama]
        An Outlines `Ollama` or `AsyncOllama` model instance.

    """
    from ollama import AsyncClient, Client

    if isinstance(client, Client):
        return Ollama(client, model_name)
    elif isinstance(client, AsyncClient):
        return AsyncOllama(client, model_name)
    else:
        raise ValueError(
            "Invalid client type, the client must be an instance of "
            "`ollama.Client` or `ollama.AsyncClient`."
        )

`from_openai(client, model_name=None)`

Create an Outlines OpenAI or AsyncOpenAI model instance from an openai.OpenAI or openai.AsyncOpenAI client.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]`	An `openai.OpenAI`, `openai.AsyncOpenAI`, `openai.AzureOpenAI` or `openai.AsyncAzureOpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`OpenAI`	An Outlines `OpenAI` or `AsyncOpenAI` model instance.

Source code in outlines/models/openai.py

def from_openai(
    client: Union[
        "OpenAIClient",
        "AsyncOpenAIClient",
        "AzureOpenAIClient",
        "AsyncAzureOpenAIClient",
    ],
    model_name: Optional[str] = None,
) -> Union[OpenAI, AsyncOpenAI]:
    """Create an Outlines `OpenAI` or `AsyncOpenAI` model instance from an
    `openai.OpenAI` or `openai.AsyncOpenAI` client.

    Parameters
    ----------
    client
        An `openai.OpenAI`, `openai.AsyncOpenAI`, `openai.AzureOpenAI` or
        `openai.AsyncAzureOpenAI` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    OpenAI
        An Outlines `OpenAI` or `AsyncOpenAI` model instance.

    """
    import openai

    if isinstance(client, openai.OpenAI):
        return OpenAI(client, model_name)
    elif isinstance(client, openai.AsyncOpenAI):
        return AsyncOpenAI(client, model_name)
    else:
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "+ `openai.OpenAI` or `openai.AsyncOpenAI`."
        )

`from_sglang(client, model_name=None)`

Create a SGLang or AsyncSGLang instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI]`	An `openai.OpenAI` or `openai.AsyncOpenAI` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[SGLang, AsyncSGLang]`	An Outlines `SGLang` or `AsyncSGLang` model instance.

Source code in outlines/models/sglang.py

def from_sglang(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[SGLang, AsyncSGLang]:
    """Create a `SGLang` or `AsyncSGLang` instance from an `openai.OpenAI` or
    `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[SGLang, AsyncSGLang]
        An Outlines `SGLang` or `AsyncSGLang` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return SGLang(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncSGLang(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

`from_tgi(client)`

Create an Outlines TGI or AsyncTGI model instance from an huggingface_hub.InferenceClient or huggingface_hub.AsyncInferenceClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[InferenceClient, AsyncInferenceClient]`	An `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient` instance.	required

Returns:

Type	Description
`Union[TGI, AsyncTGI]`	An Outlines `TGI` or `AsyncTGI` model instance.

Source code in outlines/models/tgi.py

def from_tgi(
    client: Union["InferenceClient", "AsyncInferenceClient"],
) -> Union[TGI, AsyncTGI]:
    """Create an Outlines `TGI` or `AsyncTGI` model instance from an
    `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient`
    instance.

    Parameters
    ----------
    client
        An `huggingface_hub.InferenceClient` or
        `huggingface_hub.AsyncInferenceClient` instance.

    Returns
    -------
    Union[TGI, AsyncTGI]
        An Outlines `TGI` or `AsyncTGI` model instance.

    """
    from huggingface_hub import AsyncInferenceClient, InferenceClient

    if isinstance(client, InferenceClient):
        return TGI(client)
    elif isinstance(client, AsyncInferenceClient):
        return AsyncTGI(client)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            + "Please provide an HuggingFace InferenceClient "
            + "or AsyncInferenceClient instance."
        )

`from_transformers(model, tokenizer_or_processor, *, device_dtype=None)`

Create an Outlines Transformers or TransformersMultiModal model instance from a PreTrainedModel instance and a PreTrainedTokenizer or ProcessorMixin instance.

outlines supports PreTrainedModelForCausalLM, PreTrainedMambaForCausalLM, PreTrainedModelForSeq2Seq and any model that implements the transformers model API.

Parameters:

Name	Type	Description	Default
`model`	`PreTrainedModel`	A `transformers.PreTrainedModel` instance.	required
`tokenizer_or_processor`	`Union[PreTrainedTokenizer, ProcessorMixin]`	A `transformers.PreTrainedTokenizer` or `transformers.ProcessorMixin` instance.	required
`device_dtype`	`Optional[dtype]`	The dtype to use for the model. If not provided, the model will use the default dtype.	`None`

Returns:

Type	Description
`Union[Transformers, TransformersMultiModal]`	An Outlines `Transformers` or `TransformersMultiModal` model instance.

Source code in outlines/models/transformers.py

def from_transformers(
    model: "PreTrainedModel",
    tokenizer_or_processor: Union["PreTrainedTokenizer", "ProcessorMixin"],
    *,
    device_dtype: Optional["torch.dtype"] = None,
) -> Union[Transformers, TransformersMultiModal]:
    """Create an Outlines `Transformers` or `TransformersMultiModal` model
    instance from a `PreTrainedModel` instance and a `PreTrainedTokenizer` or
    `ProcessorMixin` instance.

    `outlines` supports `PreTrainedModelForCausalLM`,
    `PreTrainedMambaForCausalLM`, `PreTrainedModelForSeq2Seq` and any model
    that implements the `transformers` model API.

    Parameters
    ----------
    model
        A `transformers.PreTrainedModel` instance.
    tokenizer_or_processor
        A `transformers.PreTrainedTokenizer` or
        `transformers.ProcessorMixin` instance.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    Returns
    -------
    Union[Transformers, TransformersMultiModal]
        An Outlines `Transformers` or `TransformersMultiModal` model instance.

    """
    from transformers import (
        PreTrainedTokenizer, PreTrainedTokenizerFast, ProcessorMixin)

    if isinstance(
        tokenizer_or_processor, (PreTrainedTokenizer, PreTrainedTokenizerFast)
    ):
        tokenizer = tokenizer_or_processor
        return Transformers(model, tokenizer, device_dtype=device_dtype)
    elif isinstance(tokenizer_or_processor, ProcessorMixin):
        processor = tokenizer_or_processor
        return TransformersMultiModal(model, processor, device_dtype=device_dtype)
    else:
        raise ValueError(
            "We couldn't determine whether the model passed to `from_transformers`"
            + " is a text-2-text or a multi-modal model. Please provide a "
            + "a transformers tokenizer or processor."
        )

`from_vllm(client, model_name=None)`

Create an Outlines VLLM or AsyncVLLM model instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI]`	An `openai.OpenAI` or `openai.AsyncOpenAI` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[VLLM, AsyncVLLM]`	An Outlines `VLLM` or `AsyncVLLM` model instance.

Source code in outlines/models/vllm.py

def from_vllm(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[VLLM, AsyncVLLM]:
    """Create an Outlines `VLLM` or `AsyncVLLM` model instance from an
    `openai.OpenAI` or `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[VLLM, AsyncVLLM]
        An Outlines `VLLM` or `AsyncVLLM` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return VLLM(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncVLLM(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

`from_vllm_offline(model)`

Create an Outlines VLLMOffline model instance from a vllm.LLM instance.

Parameters:

Name	Type	Description	Default
`model`	`LLM`	A `vllm.LLM` instance.	required

Returns:

Type	Description
`VLLMOffline`	An Outlines `VLLMOffline` model instance.

Source code in outlines/models/vllm_offline.py

def from_vllm_offline(model: "LLM") -> VLLMOffline:
    """Create an Outlines `VLLMOffline` model instance from a `vllm.LLM`
    instance.

    Parameters
    ----------
    model
        A `vllm.LLM` instance.

    Returns
    -------
    VLLMOffline
        An Outlines `VLLMOffline` model instance.

    """
    return VLLMOffline(model)

`applications`

Encapsulate a prompt template and an output type into a reusable object.

`Application`

Application is a class that encapsulates a prompt template and an output type. It can be called to generate a response by providing a model, the values to be substituted in the template in a dictionary and optional inference parameters.

Parameters:

Name	Type	Description	Default
`template`	`Union[Template, Callable]`	A callable that takes arguments and returns a prompt string.	required
`output_type`	`Any`	The expected output type of the generated response.	`None`

Examples:

from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer
from outlines import models, Application
from outlines.types import JsonType
from outlines.templates import Template

class OutputModel(BaseModel):
    result: int

model = models.from_transformers(
    AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"),
    AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
)

template_string = "What is 2 times {{ num }}?"
template = Template.from_string(template_string)

application = Application(template, JsonType(OutputModel))

result = application(model, {"num": 3}, max_new_tokens=20)
print(result)  # Expected output: { "result" : 6 }

Source code in outlines/applications.py

class Application:
    """
    Application is a class that encapsulates a prompt template and an
    output type. It can be called to generate a response by providing a
    model, the values to be substituted in the template in a dictionary
    and optional inference parameters.

    Parameters
    ----------
    template : Union[Template, Callable]
        A callable that takes arguments and returns a prompt string.
    output_type : Any
        The expected output type of the generated response.

    Examples
    --------
    ```python
    from pydantic import BaseModel
    from transformers import AutoModelForCausalLM, AutoTokenizer
    from outlines import models, Application
    from outlines.types import JsonType
    from outlines.templates import Template

    class OutputModel(BaseModel):
        result: int

    model = models.from_transformers(
        AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"),
        AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
    )

    template_string = "What is 2 times {{ num }}?"
    template = Template.from_string(template_string)

    application = Application(template, JsonType(OutputModel))

    result = application(model, {"num": 3}, max_new_tokens=20)
    print(result)  # Expected output: { "result" : 6 }
    ```

    """
    def __init__(
        self,
        template: Union[Template, Callable],
        output_type: Optional[Any] = None,
    ):
        """
        Parameters
        ----------
        template
            The template to use to build the prompt.
        output_type
            The output type provided to the generator.

        """
        self.template = template
        self.output_type = output_type
        self.generator: Optional[Union[
            BlackBoxGenerator, SteerableGenerator
        ]] = None
        self.model: Optional[Model] = None

    def __call__(
        self,
        model: Model,
        template_vars: Dict[str, Any],
        **inference_kwargs
    ) -> Any:
        """
        Parameters
        ----------
        model
            The model to use to generate the response.
        template_vars
            The variables to be substituted in the template.
        **inference_kwargs
            Additional keyword arguments to pass to the model.
        Returns
        -------
        Any
            The generated response.
        """
        if model is None:
            raise ValueError("you must provide a model")
        # We save the generator to avoid creating a new one for each call.
        # If the model has changed since the last call, we create a new
        # generator.
        if model != self.model:
            self.model = model
            self.generator = Generator(model, self.output_type)  # type: ignore

        prompt = self.template(**template_vars)
        assert self.generator is not None
        return self.generator(prompt, **inference_kwargs)

`call(model, template_vars, **inference_kwargs)`

Parameters:

Name	Type	Description	Default
`model`	`Model`	The model to use to generate the response.	required
`template_vars`	`Dict[str, Any]`	The variables to be substituted in the template.	required
`**inference_kwargs`		Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The generated response.

Source code in outlines/applications.py

def __call__(
    self,
    model: Model,
    template_vars: Dict[str, Any],
    **inference_kwargs
) -> Any:
    """
    Parameters
    ----------
    model
        The model to use to generate the response.
    template_vars
        The variables to be substituted in the template.
    **inference_kwargs
        Additional keyword arguments to pass to the model.
    Returns
    -------
    Any
        The generated response.
    """
    if model is None:
        raise ValueError("you must provide a model")
    # We save the generator to avoid creating a new one for each call.
    # If the model has changed since the last call, we create a new
    # generator.
    if model != self.model:
        self.model = model
        self.generator = Generator(model, self.output_type)  # type: ignore

    prompt = self.template(**template_vars)
    assert self.generator is not None
    return self.generator(prompt, **inference_kwargs)

`init(template, output_type=None)`

Parameters:

Name	Type	Description	Default
`template`	`Union[Template, Callable]`	The template to use to build the prompt.	required
`output_type`	`Optional[Any]`	The output type provided to the generator.	`None`

Source code in outlines/applications.py

def __init__(
    self,
    template: Union[Template, Callable],
    output_type: Optional[Any] = None,
):
    """
    Parameters
    ----------
    template
        The template to use to build the prompt.
    output_type
        The output type provided to the generator.

    """
    self.template = template
    self.output_type = output_type
    self.generator: Optional[Union[
        BlackBoxGenerator, SteerableGenerator
    ]] = None
    self.model: Optional[Model] = None

`backends`

Module to define the backends in charge of creating logits processors.

`BaseBackend`

Bases: ABC

Base class for all backends.

The subclasses must implement methods that create a logits processor from a JSON schema, regex or CFG.

Source code in outlines/backends/base.py

class BaseBackend(ABC):
    """Base class for all backends.

    The subclasses must implement methods that create a logits processor
    from a JSON schema, regex or CFG.

    """

    @abstractmethod
    def get_json_schema_logits_processor(
        self, json_schema: str
    ) -> LogitsProcessorType:
        """Create a logits processor from a JSON schema.

        Parameters
        ----------
        json_schema: str
            The JSON schema to create a logits processor from.

        Returns
        -------
        LogitsProcessorType
            The logits processor.

        """
        ...

    @abstractmethod
    def get_regex_logits_processor(self, regex: str) -> LogitsProcessorType:
        """Create a logits processor from a regex.

        Parameters
        ----------
        regex: str
            The regex to create a logits processor from.

        Returns
        -------
        LogitsProcessorType
            The logits processor.

        """
        ...

    @abstractmethod
    def get_cfg_logits_processor(self, grammar: str) -> LogitsProcessorType:
        """Create a logits processor from a context-free grammar.

        Parameters
        ----------
        grammar: str
            The context-free grammar to create a logits processor from.

        Returns
        -------
        LogitsProcessorType
            The logits processor.

        """
        ...

`get_cfg_logits_processor(grammar)` `abstractmethod`

Create a logits processor from a context-free grammar.

Parameters:

Name	Type	Description	Default
`grammar`	`str`	The context-free grammar to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessorType`	The logits processor.

Source code in outlines/backends/base.py

@abstractmethod
def get_cfg_logits_processor(self, grammar: str) -> LogitsProcessorType:
    """Create a logits processor from a context-free grammar.

    Parameters
    ----------
    grammar: str
        The context-free grammar to create a logits processor from.

    Returns
    -------
    LogitsProcessorType
        The logits processor.

    """
    ...

`get_json_schema_logits_processor(json_schema)` `abstractmethod`

Create a logits processor from a JSON schema.

Parameters:

Name	Type	Description	Default
`json_schema`	`str`	The JSON schema to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessorType`	The logits processor.

Source code in outlines/backends/base.py

@abstractmethod
def get_json_schema_logits_processor(
    self, json_schema: str
) -> LogitsProcessorType:
    """Create a logits processor from a JSON schema.

    Parameters
    ----------
    json_schema: str
        The JSON schema to create a logits processor from.

    Returns
    -------
    LogitsProcessorType
        The logits processor.

    """
    ...

`get_regex_logits_processor(regex)` `abstractmethod`

Create a logits processor from a regex.

Parameters:

Name	Type	Description	Default
`regex`	`str`	The regex to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessorType`	The logits processor.

Source code in outlines/backends/base.py

@abstractmethod
def get_regex_logits_processor(self, regex: str) -> LogitsProcessorType:
    """Create a logits processor from a regex.

    Parameters
    ----------
    regex: str
        The regex to create a logits processor from.

    Returns
    -------
    LogitsProcessorType
        The logits processor.

    """
    ...

`LLGuidanceBackend`

Bases: BaseBackend

Backend for LLGuidance.

Source code in outlines/backends/llguidance.py

class LLGuidanceBackend(BaseBackend):
    """Backend for LLGuidance."""

    def __init__(self, model: SteerableModel):
        """
        Parameters
        ----------
        model
            The Outlines model of the user.

        """
        import llguidance as llg

        self.llg = llg
        self.tensor_library_name = model.tensor_library_name
        self.llg_tokenizer = self._create_llg_tokenizer(model)

    def _create_llg_tokenizer(self, model: SteerableModel) -> "LLGTokenizer":
        """Create an llg tokenizer from the Outlines model's tokenizer.

        Parameters
        ----------
        model: Model
            The Outlines model.

        Returns
        -------
        LLGTokenizer
            The llg tokenizer.

        """
        if isinstance(model, Transformers):
            import llguidance.hf

            return llguidance.hf.from_tokenizer(model.hf_tokenizer)

        elif isinstance(model, LlamaCpp):
            import llama_cpp
            import llguidance.llamacpp

            vocab = llama_cpp.llama_model_get_vocab(model.model.model)
            return llguidance.llamacpp.lltokenizer_from_vocab(vocab)

        elif isinstance(model, MLXLM): # pragma: no cover
            import llguidance.hf

            return llguidance.hf.from_tokenizer(
                model.mlx_tokenizer._tokenizer
            )

        else: # pragma: no cover
            raise ValueError(
                f"Unsupported model type: {type(model)}. "
                "Llguidance only supports LlamaCpp, MLXLM "
                "and Transformers models."
            )

    def get_json_schema_logits_processor(
        self, json_schema: str
    ) -> LLGuidanceLogitsProcessor:
        """Create a logits processor from a JSON schema.

        Parameters
        ----------
        json_schema: str
            The JSON schema to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        grammar_spec = self.llg.grammar_from("json_schema", json_schema)
        return LLGuidanceLogitsProcessor(
            grammar_spec, self.llg_tokenizer, self.tensor_library_name
        )

    def get_regex_logits_processor(
        self, regex: str
    ) -> LLGuidanceLogitsProcessor:
        """Create a logits processor from a regex.

        Parameters
        ----------
        regex: str
            The regex to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        grammar_spec = self.llg.grammar_from("regex", regex)
        return LLGuidanceLogitsProcessor(
            grammar_spec, self.llg_tokenizer, self.tensor_library_name
        )

    def get_cfg_logits_processor(
        self, grammar: str
    ) -> LLGuidanceLogitsProcessor:
        """Create a logits processor from a context-free grammar.

        Parameters
        ----------
        grammar: str
            The context-free grammar to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        # We try both lark and ebnf
        try:
            grammar_spec = self.llg.grammar_from("grammar", grammar)
        except ValueError:
            grammar_spec = self.llg.grammar_from("lark", grammar)
        return LLGuidanceLogitsProcessor(
            grammar_spec, self.llg_tokenizer, self.tensor_library_name
        )

`init(model)`

Parameters:

Name	Type	Description	Default
`model`	`SteerableModel`	The Outlines model of the user.	required

Source code in outlines/backends/llguidance.py

def __init__(self, model: SteerableModel):
    """
    Parameters
    ----------
    model
        The Outlines model of the user.

    """
    import llguidance as llg

    self.llg = llg
    self.tensor_library_name = model.tensor_library_name
    self.llg_tokenizer = self._create_llg_tokenizer(model)

`get_cfg_logits_processor(grammar)`

Create a logits processor from a context-free grammar.

Parameters:

Name	Type	Description	Default
`grammar`	`str`	The context-free grammar to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/llguidance.py

def get_cfg_logits_processor(
    self, grammar: str
) -> LLGuidanceLogitsProcessor:
    """Create a logits processor from a context-free grammar.

    Parameters
    ----------
    grammar: str
        The context-free grammar to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    # We try both lark and ebnf
    try:
        grammar_spec = self.llg.grammar_from("grammar", grammar)
    except ValueError:
        grammar_spec = self.llg.grammar_from("lark", grammar)
    return LLGuidanceLogitsProcessor(
        grammar_spec, self.llg_tokenizer, self.tensor_library_name
    )

`get_json_schema_logits_processor(json_schema)`

Create a logits processor from a JSON schema.

Parameters:

Name	Type	Description	Default
`json_schema`	`str`	The JSON schema to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/llguidance.py

def get_json_schema_logits_processor(
    self, json_schema: str
) -> LLGuidanceLogitsProcessor:
    """Create a logits processor from a JSON schema.

    Parameters
    ----------
    json_schema: str
        The JSON schema to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    grammar_spec = self.llg.grammar_from("json_schema", json_schema)
    return LLGuidanceLogitsProcessor(
        grammar_spec, self.llg_tokenizer, self.tensor_library_name
    )

`get_regex_logits_processor(regex)`

Create a logits processor from a regex.

Parameters:

Name	Type	Description	Default
`regex`	`str`	The regex to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/llguidance.py

def get_regex_logits_processor(
    self, regex: str
) -> LLGuidanceLogitsProcessor:
    """Create a logits processor from a regex.

    Parameters
    ----------
    regex: str
        The regex to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    grammar_spec = self.llg.grammar_from("regex", regex)
    return LLGuidanceLogitsProcessor(
        grammar_spec, self.llg_tokenizer, self.tensor_library_name
    )

`OutlinesCoreBackend`

Bases: BaseBackend

Backend for Outlines Core.

Source code in outlines/backends/outlines_core.py

class OutlinesCoreBackend(BaseBackend):
    """Backend for Outlines Core."""

    def __init__(self, model: SteerableModel):
        """
        Parameters
        ----------
        model
            The Outlines model of the user.

        """
        if isinstance(model, Transformers):
            tokenizer = model.tokenizer
            vocabulary = tokenizer.get_vocab()
            eos_token_id = tokenizer.eos_token_id
            eos_token = tokenizer.eos_token
            token_to_str = tokenizer.convert_token_to_string
        elif isinstance(model, LlamaCpp):
            tokenizer = model.tokenizer  # type: ignore
            vocabulary = tokenizer.vocabulary
            eos_token_id = tokenizer.eos_token_id
            eos_token = tokenizer.eos_token
            token_to_str = tokenizer.convert_token_to_string
        elif isinstance(model, MLXLM):  # pragma: no cover
            tokenizer = model.mlx_tokenizer  # type: ignore
            vocabulary = tokenizer.get_vocab()
            eos_token_id = tokenizer.eos_token_id
            eos_token = tokenizer.eos_token
            token_to_str = lambda token: tokenizer.convert_tokens_to_string([token])  # type: ignore
        else:  # pragma: no cover
            raise ValueError(f"Unsupported model type: {type(model)}")

        self.eos_token_id = eos_token_id
        self.vocabulary = self.create_outlines_core_vocabulary(
            vocabulary, eos_token_id, eos_token, token_to_str
        )
        self.tensor_library_name = model.tensor_library_name

    def get_json_schema_logits_processor(self, json_schema: str):
        """Create a logits processor from a JSON schema.

        Parameters
        ----------
        json_schema: str
            The JSON schema to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        regex = build_regex_from_schema(json_schema)
        return self.get_regex_logits_processor(regex)

    def get_regex_logits_processor(self, regex: str):
        """Create a logits processor from a regex.

        Parameters
        ----------
        regex: str
            The regex to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        index = Index(regex, self.vocabulary)
        return OutlinesCoreLogitsProcessor(index, self.tensor_library_name)

    def get_cfg_logits_processor(self, grammar):
        raise NotImplementedError(
            "Outlines Core does not support context-free grammar."
        )

    @staticmethod
    def create_outlines_core_vocabulary(
        vocab: Dict[str, int],
        eos_token_id: int,
        eos_token: str,
        token_to_str: Callable[[str], str],
    ) -> Vocabulary:
        """Create an Outlines Core Vocabulary instance.

        Parameters
        ----------
        vocab: Dict[str, int]
            The vocabulary to create an Outlines Core vocabulary from.
        eos_token_id: int
            The EOS token ID.
        eos_token: str
            The EOS token.
        token_to_str: Callable[[str], str]
            The function to convert a token to a string.

        Returns
        -------
        Vocabulary
            The Outlines Core Vocabulary instance.

        """
        formatted_vocab: Dict[str, List[int]] = {}
        for token, token_id in vocab.items():
            # This step is necessary to transform special tokens into their
            # string representation, in particular for spacing. We need those
            # string representations as outlines core first builds an FSM from
            # the regex provided that only contains regular strings.
            token_as_str = token_to_str(token)
            formatted_vocab.setdefault(token_as_str, []).append(token_id)
        eos_token_as_str = token_to_str(eos_token)
        formatted_vocab.pop(eos_token_as_str, None)
        return Vocabulary(eos_token_id, formatted_vocab)

`init(model)`

Parameters:

Name	Type	Description	Default
`model`	`SteerableModel`	The Outlines model of the user.	required

Source code in outlines/backends/outlines_core.py

def __init__(self, model: SteerableModel):
    """
    Parameters
    ----------
    model
        The Outlines model of the user.

    """
    if isinstance(model, Transformers):
        tokenizer = model.tokenizer
        vocabulary = tokenizer.get_vocab()
        eos_token_id = tokenizer.eos_token_id
        eos_token = tokenizer.eos_token
        token_to_str = tokenizer.convert_token_to_string
    elif isinstance(model, LlamaCpp):
        tokenizer = model.tokenizer  # type: ignore
        vocabulary = tokenizer.vocabulary
        eos_token_id = tokenizer.eos_token_id
        eos_token = tokenizer.eos_token
        token_to_str = tokenizer.convert_token_to_string
    elif isinstance(model, MLXLM):  # pragma: no cover
        tokenizer = model.mlx_tokenizer  # type: ignore
        vocabulary = tokenizer.get_vocab()
        eos_token_id = tokenizer.eos_token_id
        eos_token = tokenizer.eos_token
        token_to_str = lambda token: tokenizer.convert_tokens_to_string([token])  # type: ignore
    else:  # pragma: no cover
        raise ValueError(f"Unsupported model type: {type(model)}")

    self.eos_token_id = eos_token_id
    self.vocabulary = self.create_outlines_core_vocabulary(
        vocabulary, eos_token_id, eos_token, token_to_str
    )
    self.tensor_library_name = model.tensor_library_name

`create_outlines_core_vocabulary(vocab, eos_token_id, eos_token, token_to_str)` `staticmethod`

Create an Outlines Core Vocabulary instance.

Parameters:

Name	Type	Description	Default
`vocab`	`Dict[str, int]`	The vocabulary to create an Outlines Core vocabulary from.	required
`eos_token_id`	`int`	The EOS token ID.	required
`eos_token`	`str`	The EOS token.	required
`token_to_str`	`Callable[[str], str]`	The function to convert a token to a string.	required

Returns:

Type	Description
`Vocabulary`	The Outlines Core Vocabulary instance.

Source code in outlines/backends/outlines_core.py

@staticmethod
def create_outlines_core_vocabulary(
    vocab: Dict[str, int],
    eos_token_id: int,
    eos_token: str,
    token_to_str: Callable[[str], str],
) -> Vocabulary:
    """Create an Outlines Core Vocabulary instance.

    Parameters
    ----------
    vocab: Dict[str, int]
        The vocabulary to create an Outlines Core vocabulary from.
    eos_token_id: int
        The EOS token ID.
    eos_token: str
        The EOS token.
    token_to_str: Callable[[str], str]
        The function to convert a token to a string.

    Returns
    -------
    Vocabulary
        The Outlines Core Vocabulary instance.

    """
    formatted_vocab: Dict[str, List[int]] = {}
    for token, token_id in vocab.items():
        # This step is necessary to transform special tokens into their
        # string representation, in particular for spacing. We need those
        # string representations as outlines core first builds an FSM from
        # the regex provided that only contains regular strings.
        token_as_str = token_to_str(token)
        formatted_vocab.setdefault(token_as_str, []).append(token_id)
    eos_token_as_str = token_to_str(eos_token)
    formatted_vocab.pop(eos_token_as_str, None)
    return Vocabulary(eos_token_id, formatted_vocab)

`get_json_schema_logits_processor(json_schema)`

Create a logits processor from a JSON schema.

Parameters:

Name	Type	Description	Default
`json_schema`	`str`	The JSON schema to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/outlines_core.py

def get_json_schema_logits_processor(self, json_schema: str):
    """Create a logits processor from a JSON schema.

    Parameters
    ----------
    json_schema: str
        The JSON schema to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    regex = build_regex_from_schema(json_schema)
    return self.get_regex_logits_processor(regex)

`get_regex_logits_processor(regex)`

Create a logits processor from a regex.

Parameters:

Name	Type	Description	Default
`regex`	`str`	The regex to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/outlines_core.py

def get_regex_logits_processor(self, regex: str):
    """Create a logits processor from a regex.

    Parameters
    ----------
    regex: str
        The regex to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    index = Index(regex, self.vocabulary)
    return OutlinesCoreLogitsProcessor(index, self.tensor_library_name)

`XGrammarBackend`

Bases: BaseBackend

Backend for XGrammar.

Source code in outlines/backends/xgrammar.py

class XGrammarBackend(BaseBackend):
    """Backend for XGrammar."""

    def __init__(self, model: SteerableModel):
        """
        Parameters
        ----------
        model
            The Outlines model of the user.

        """
        import xgrammar as xgr

        if isinstance(model, Transformers):
            tokenizer = model.hf_tokenizer
        elif isinstance(model, MLXLM): # pragma: no cover
            tokenizer = model.mlx_tokenizer._tokenizer
        else: # pragma: no cover
            raise ValueError(
                "The xgrammar backend only supports Transformers and "
                + "MLXLM models"
            )

        tokenizer_info = xgr.TokenizerInfo.from_huggingface(
            tokenizer,
            vocab_size=len(tokenizer.get_vocab())
        )
        self.grammar_compiler = xgr.GrammarCompiler(tokenizer_info)
        self.tensor_library_name = model.tensor_library_name

    def get_json_schema_logits_processor(
        self, json_schema: str
    ) -> XGrammarLogitsProcessor:
        """Create a logits processor from a JSON schema.

        Parameters
        ----------
        json_schema: str
            The JSON schema to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        compiled_grammar = self.grammar_compiler.compile_json_schema(
            json_schema
        )
        return XGrammarLogitsProcessor(
            compiled_grammar,
            self.tensor_library_name
        )

    def get_regex_logits_processor(
        self, regex: str
    ) -> XGrammarLogitsProcessor:
        """Create a logits processor from a regex.

        Parameters
        ----------
        regex: str
            The regex to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        compiled_grammar = self.grammar_compiler.compile_regex(regex)
        return XGrammarLogitsProcessor(
            compiled_grammar,
            self.tensor_library_name
        )

    def get_cfg_logits_processor(
        self, grammar: str
    ) -> XGrammarLogitsProcessor:
        """Create a logits processor from a context-free grammar.

        Parameters
        ----------
        grammar: str
            The context-free grammar to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        compiled_grammar = self.grammar_compiler.compile_grammar(grammar)
        return XGrammarLogitsProcessor(
            compiled_grammar,
            self.tensor_library_name
        )

`init(model)`

Parameters:

Name	Type	Description	Default
`model`	`SteerableModel`	The Outlines model of the user.	required

Source code in outlines/backends/xgrammar.py

def __init__(self, model: SteerableModel):
    """
    Parameters
    ----------
    model
        The Outlines model of the user.

    """
    import xgrammar as xgr

    if isinstance(model, Transformers):
        tokenizer = model.hf_tokenizer
    elif isinstance(model, MLXLM): # pragma: no cover
        tokenizer = model.mlx_tokenizer._tokenizer
    else: # pragma: no cover
        raise ValueError(
            "The xgrammar backend only supports Transformers and "
            + "MLXLM models"
        )

    tokenizer_info = xgr.TokenizerInfo.from_huggingface(
        tokenizer,
        vocab_size=len(tokenizer.get_vocab())
    )
    self.grammar_compiler = xgr.GrammarCompiler(tokenizer_info)
    self.tensor_library_name = model.tensor_library_name

`get_cfg_logits_processor(grammar)`

Create a logits processor from a context-free grammar.

Parameters:

Name	Type	Description	Default
`grammar`	`str`	The context-free grammar to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/xgrammar.py

def get_cfg_logits_processor(
    self, grammar: str
) -> XGrammarLogitsProcessor:
    """Create a logits processor from a context-free grammar.

    Parameters
    ----------
    grammar: str
        The context-free grammar to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    compiled_grammar = self.grammar_compiler.compile_grammar(grammar)
    return XGrammarLogitsProcessor(
        compiled_grammar,
        self.tensor_library_name
    )

`get_json_schema_logits_processor(json_schema)`

Create a logits processor from a JSON schema.

Parameters:

Name	Type	Description	Default
`json_schema`	`str`	The JSON schema to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/xgrammar.py

def get_json_schema_logits_processor(
    self, json_schema: str
) -> XGrammarLogitsProcessor:
    """Create a logits processor from a JSON schema.

    Parameters
    ----------
    json_schema: str
        The JSON schema to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    compiled_grammar = self.grammar_compiler.compile_json_schema(
        json_schema
    )
    return XGrammarLogitsProcessor(
        compiled_grammar,
        self.tensor_library_name
    )

`get_regex_logits_processor(regex)`

Create a logits processor from a regex.

Parameters:

Name	Type	Description	Default
`regex`	`str`	The regex to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/xgrammar.py

def get_regex_logits_processor(
    self, regex: str
) -> XGrammarLogitsProcessor:
    """Create a logits processor from a regex.

    Parameters
    ----------
    regex: str
        The regex to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    compiled_grammar = self.grammar_compiler.compile_regex(regex)
    return XGrammarLogitsProcessor(
        compiled_grammar,
        self.tensor_library_name
    )

`get_cfg_logits_processor(backend_name, model, grammar)`

Create a logits processor from a context-free grammar.

Parameters:

Name	Type	Description	Default
`backend_name`	`str \| None`	The name of the backend to use.	required
`model`	`SteerableModel`	The Outlines model of the user.	required
`grammar`	`str`	The context-free grammar to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessorType`	The logits processor.

Source code in outlines/backends/__init__.py

def get_cfg_logits_processor(
    backend_name: str | None,
    model: SteerableModel,
    grammar: str,
) -> LogitsProcessorType:
    """Create a logits processor from a context-free grammar.

    Parameters
    ----------
    backend_name: str | None
        The name of the backend to use.
    model: Model
        The Outlines model of the user.
    grammar: str
        The context-free grammar to create a logits processor from.

    Returns
    -------
    LogitsProcessorType
        The logits processor.

    """
    backend = _get_backend(
        backend_name or CFG_DEFAULT_BACKEND,
        model,
    )
    return backend.get_cfg_logits_processor(grammar)

`get_json_schema_logits_processor(backend_name, model, json_schema)`

Create a logits processor from a JSON schema.

Parameters:

Name	Type	Description	Default
`backend_name`	`str \| None`	The name of the backend to use.	required
`model`	`SteerableModel`	The Outlines model of the user.	required
`json_schema`	`str`	The JSON schema to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessorType`	The logits processor.

Source code in outlines/backends/__init__.py

def get_json_schema_logits_processor(
    backend_name: str | None,
    model: SteerableModel,
    json_schema: str,
) -> LogitsProcessorType:
    """Create a logits processor from a JSON schema.

    Parameters
    ----------
    backend_name: str | None
        The name of the backend to use.
    model: Model
        The Outlines model of the user.
    json_schema: str
        The JSON schema to create a logits processor from.

    Returns
    -------
    LogitsProcessorType
        The logits processor.

    """
    backend = _get_backend(
        backend_name or JSON_SCHEMA_DEFAULT_BACKEND,
        model,
    )
    return backend.get_json_schema_logits_processor(json_schema)

`get_regex_logits_processor(backend_name, model, regex)`

Create a logits processor from a regex.

Parameters:

Name	Type	Description	Default
`backend_name`	`str \| None`	The name of the backend to use.	required
`model`	`SteerableModel`	The Outlines model of the user.	required
`regex`	`str`	The regex to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessorType`	The logits processor.

Source code in outlines/backends/__init__.py

def get_regex_logits_processor(
    backend_name: str | None,
    model: SteerableModel,
    regex: str,
) -> LogitsProcessorType:
    """Create a logits processor from a regex.

    Parameters
    ----------
    backend_name: str | None
        The name of the backend to use.
    model: Model
        The Outlines model of the user.
    regex: str
        The regex to create a logits processor from.

    Returns
    -------
    LogitsProcessorType
        The logits processor.

    """
    backend = _get_backend(
        backend_name or REGEX_DEFAULT_BACKEND,
        model,
    )
    return backend.get_regex_logits_processor(regex)

`base`

Base class for all backends.

`BaseBackend`

Bases: ABC

Base class for all backends.

The subclasses must implement methods that create a logits processor from a JSON schema, regex or CFG.

Source code in outlines/backends/base.py

class BaseBackend(ABC):
    """Base class for all backends.

    The subclasses must implement methods that create a logits processor
    from a JSON schema, regex or CFG.

    """

    @abstractmethod
    def get_json_schema_logits_processor(
        self, json_schema: str
    ) -> LogitsProcessorType:
        """Create a logits processor from a JSON schema.

        Parameters
        ----------
        json_schema: str
            The JSON schema to create a logits processor from.

        Returns
        -------
        LogitsProcessorType
            The logits processor.

        """
        ...

    @abstractmethod
    def get_regex_logits_processor(self, regex: str) -> LogitsProcessorType:
        """Create a logits processor from a regex.

        Parameters
        ----------
        regex: str
            The regex to create a logits processor from.

        Returns
        -------
        LogitsProcessorType
            The logits processor.

        """
        ...

    @abstractmethod
    def get_cfg_logits_processor(self, grammar: str) -> LogitsProcessorType:
        """Create a logits processor from a context-free grammar.

        Parameters
        ----------
        grammar: str
            The context-free grammar to create a logits processor from.

        Returns
        -------
        LogitsProcessorType
            The logits processor.

        """
        ...

`get_cfg_logits_processor(grammar)` `abstractmethod`

Create a logits processor from a context-free grammar.

Parameters:

Name	Type	Description	Default
`grammar`	`str`	The context-free grammar to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessorType`	The logits processor.

Source code in outlines/backends/base.py

@abstractmethod
def get_cfg_logits_processor(self, grammar: str) -> LogitsProcessorType:
    """Create a logits processor from a context-free grammar.

    Parameters
    ----------
    grammar: str
        The context-free grammar to create a logits processor from.

    Returns
    -------
    LogitsProcessorType
        The logits processor.

    """
    ...

`get_json_schema_logits_processor(json_schema)` `abstractmethod`

Create a logits processor from a JSON schema.

Parameters:

Name	Type	Description	Default
`json_schema`	`str`	The JSON schema to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessorType`	The logits processor.

Source code in outlines/backends/base.py

@abstractmethod
def get_json_schema_logits_processor(
    self, json_schema: str
) -> LogitsProcessorType:
    """Create a logits processor from a JSON schema.

    Parameters
    ----------
    json_schema: str
        The JSON schema to create a logits processor from.

    Returns
    -------
    LogitsProcessorType
        The logits processor.

    """
    ...

`get_regex_logits_processor(regex)` `abstractmethod`

Create a logits processor from a regex.

Parameters:

Name	Type	Description	Default
`regex`	`str`	The regex to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessorType`	The logits processor.

Source code in outlines/backends/base.py

@abstractmethod
def get_regex_logits_processor(self, regex: str) -> LogitsProcessorType:
    """Create a logits processor from a regex.

    Parameters
    ----------
    regex: str
        The regex to create a logits processor from.

    Returns
    -------
    LogitsProcessorType
        The logits processor.

    """
    ...

`llguidance`

Backend class for LLGuidance.

`LLGuidanceBackend`

Bases: BaseBackend

Backend for LLGuidance.

Source code in outlines/backends/llguidance.py

class LLGuidanceBackend(BaseBackend):
    """Backend for LLGuidance."""

    def __init__(self, model: SteerableModel):
        """
        Parameters
        ----------
        model
            The Outlines model of the user.

        """
        import llguidance as llg

        self.llg = llg
        self.tensor_library_name = model.tensor_library_name
        self.llg_tokenizer = self._create_llg_tokenizer(model)

    def _create_llg_tokenizer(self, model: SteerableModel) -> "LLGTokenizer":
        """Create an llg tokenizer from the Outlines model's tokenizer.

        Parameters
        ----------
        model: Model
            The Outlines model.

        Returns
        -------
        LLGTokenizer
            The llg tokenizer.

        """
        if isinstance(model, Transformers):
            import llguidance.hf

            return llguidance.hf.from_tokenizer(model.hf_tokenizer)

        elif isinstance(model, LlamaCpp):
            import llama_cpp
            import llguidance.llamacpp

            vocab = llama_cpp.llama_model_get_vocab(model.model.model)
            return llguidance.llamacpp.lltokenizer_from_vocab(vocab)

        elif isinstance(model, MLXLM): # pragma: no cover
            import llguidance.hf

            return llguidance.hf.from_tokenizer(
                model.mlx_tokenizer._tokenizer
            )

        else: # pragma: no cover
            raise ValueError(
                f"Unsupported model type: {type(model)}. "
                "Llguidance only supports LlamaCpp, MLXLM "
                "and Transformers models."
            )

    def get_json_schema_logits_processor(
        self, json_schema: str
    ) -> LLGuidanceLogitsProcessor:
        """Create a logits processor from a JSON schema.

        Parameters
        ----------
        json_schema: str
            The JSON schema to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        grammar_spec = self.llg.grammar_from("json_schema", json_schema)
        return LLGuidanceLogitsProcessor(
            grammar_spec, self.llg_tokenizer, self.tensor_library_name
        )

    def get_regex_logits_processor(
        self, regex: str
    ) -> LLGuidanceLogitsProcessor:
        """Create a logits processor from a regex.

        Parameters
        ----------
        regex: str
            The regex to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        grammar_spec = self.llg.grammar_from("regex", regex)
        return LLGuidanceLogitsProcessor(
            grammar_spec, self.llg_tokenizer, self.tensor_library_name
        )

    def get_cfg_logits_processor(
        self, grammar: str
    ) -> LLGuidanceLogitsProcessor:
        """Create a logits processor from a context-free grammar.

        Parameters
        ----------
        grammar: str
            The context-free grammar to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        # We try both lark and ebnf
        try:
            grammar_spec = self.llg.grammar_from("grammar", grammar)
        except ValueError:
            grammar_spec = self.llg.grammar_from("lark", grammar)
        return LLGuidanceLogitsProcessor(
            grammar_spec, self.llg_tokenizer, self.tensor_library_name
        )

`init(model)`

Parameters:

Name	Type	Description	Default
`model`	`SteerableModel`	The Outlines model of the user.	required

Source code in outlines/backends/llguidance.py

def __init__(self, model: SteerableModel):
    """
    Parameters
    ----------
    model
        The Outlines model of the user.

    """
    import llguidance as llg

    self.llg = llg
    self.tensor_library_name = model.tensor_library_name
    self.llg_tokenizer = self._create_llg_tokenizer(model)

`get_cfg_logits_processor(grammar)`

Create a logits processor from a context-free grammar.

Parameters:

Name	Type	Description	Default
`grammar`	`str`	The context-free grammar to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/llguidance.py

def get_cfg_logits_processor(
    self, grammar: str
) -> LLGuidanceLogitsProcessor:
    """Create a logits processor from a context-free grammar.

    Parameters
    ----------
    grammar: str
        The context-free grammar to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    # We try both lark and ebnf
    try:
        grammar_spec = self.llg.grammar_from("grammar", grammar)
    except ValueError:
        grammar_spec = self.llg.grammar_from("lark", grammar)
    return LLGuidanceLogitsProcessor(
        grammar_spec, self.llg_tokenizer, self.tensor_library_name
    )

`get_json_schema_logits_processor(json_schema)`

Create a logits processor from a JSON schema.

Parameters:

Name	Type	Description	Default
`json_schema`	`str`	The JSON schema to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/llguidance.py

def get_json_schema_logits_processor(
    self, json_schema: str
) -> LLGuidanceLogitsProcessor:
    """Create a logits processor from a JSON schema.

    Parameters
    ----------
    json_schema: str
        The JSON schema to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    grammar_spec = self.llg.grammar_from("json_schema", json_schema)
    return LLGuidanceLogitsProcessor(
        grammar_spec, self.llg_tokenizer, self.tensor_library_name
    )

`get_regex_logits_processor(regex)`

Create a logits processor from a regex.

Parameters:

Name	Type	Description	Default
`regex`	`str`	The regex to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/llguidance.py

def get_regex_logits_processor(
    self, regex: str
) -> LLGuidanceLogitsProcessor:
    """Create a logits processor from a regex.

    Parameters
    ----------
    regex: str
        The regex to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    grammar_spec = self.llg.grammar_from("regex", regex)
    return LLGuidanceLogitsProcessor(
        grammar_spec, self.llg_tokenizer, self.tensor_library_name
    )

`LLGuidanceLogitsProcessor`

Bases: OutlinesLogitsProcessor

Logits Processor for the LLGuidance backend.

Source code in outlines/backends/llguidance.py

class LLGuidanceLogitsProcessor(OutlinesLogitsProcessor):
    """Logits Processor for the LLGuidance backend."""

    def __init__(
        self,
        grammar: str,
        llg_tokenizer,
        tensor_library_name: str,
    ) -> None:
        """
        Parameters
        ----------
        grammar: str
            The grammar spec to use to create the LLMatcher
        llg_tokenizer: LLTokenizer
            The LLGuidance tokenizer
        tensor_library_name: str
            The name of the tensor library used by the model

        """
        self.is_first_token = True
        self.grammar = grammar
        self.llg_tokenizer = llg_tokenizer
        self.tensor_library_name = tensor_library_name
        super().__init__(tensor_library_name)

    def reset(self):
        """Ensure self._setup is called again for the next generation."""
        self.is_first_token = True

    def _setup(self, batch_size: int) -> None:
        """Setup the LLMatchers, the bitmask and some functions used in the
        `process_logits` method.

        This method is called when the first token is generated instead of
        at initialization because we need to know the batch size.

        Parameters
        ----------
        batch_size: int
            The batch size of the input

        """
        from llguidance import LLMatcher

        self.ll_matchers = [
            LLMatcher(self.llg_tokenizer, self.grammar)
            for _ in range(batch_size)
        ]

        # we must adapt the bitmask creation and the bias function to the
        # tensor library used by the model
        if self.tensor_library_name == "torch":
            import llguidance.torch

            self.bitmask = llguidance.torch.allocate_token_bitmask(batch_size, self.llg_tokenizer.vocab_size)
            self._bias_logits = self._bias_logits_torch
        elif self.tensor_library_name == "numpy":
            import llguidance.numpy

            self.bitmask = llguidance.numpy.allocate_token_bitmask(batch_size, self.llg_tokenizer.vocab_size)
            self._bias_logits = self._bias_logits_numpy
        elif self.tensor_library_name == "mlx": # pragma: no cover
            import llguidance.numpy

            self.bitmask = llguidance.numpy.allocate_token_bitmask(batch_size, self.llg_tokenizer.vocab_size)
            self._bias_logits = self._bias_logits_mlx
        else: # pragma: no cover
            raise ValueError(f"Unsupported tensor library: {self.tensor_library_name}")

    def _bias_logits_mlx( # pragma: no cover
        self, input_ids: TensorType, logits: TensorType
    ) -> TensorType:
        """Bias the logits for the MLX backend."""
        import llguidance.mlx
        import llguidance.numpy

        biased_logits_array = []
        for i in range(self.tensor_adapter.shape(input_ids)[0]):
            llguidance.numpy.fill_next_token_bitmask(self.ll_matchers[i], self.bitmask, i)
            biased_logits = llguidance.mlx.apply_token_bitmask(
                logits[i], self.bitmask[i] # type: ignore
            )
            biased_logits_array.append(biased_logits)

        return self.tensor_adapter.concatenate(biased_logits_array)

    def _bias_logits_torch(
        self, input_ids: TensorType, logits: TensorType
    ) -> TensorType:
        """Bias the logits for the Torch backend."""
        import llguidance.torch

        for i in range(self.tensor_adapter.shape(input_ids)[0]):
            llguidance.torch.fill_next_token_bitmask(self.ll_matchers[i], self.bitmask, i)
            self.bitmask = self.tensor_adapter.to_device(
                self.bitmask,
                self.tensor_adapter.get_device(logits)
            )
            llguidance.torch.apply_token_bitmask_inplace(
                logits[i], # type: ignore
                self.bitmask[i]
            )
            self.bitmask = self.tensor_adapter.to_device(
                self.bitmask,
                "cpu"
            )

        return logits

    def _bias_logits_numpy(
        self, input_ids: TensorType, logits: TensorType
    ) -> TensorType:
        """Bias the logits for the Numpy backend."""
        import llguidance.numpy

        for i in range(self.tensor_adapter.shape(input_ids)[0]):
            llguidance.numpy.fill_next_token_bitmask(self.ll_matchers[i], self.bitmask, i)
            llguidance.numpy.apply_token_bitmask_inplace(
                logits[i], self.bitmask[i] # type: ignore
            )

        return logits

    def process_logits(
        self, input_ids: TensorType, logits: TensorType
    ) -> TensorType:
        """Use the instances of LLMatcher to bias the logits.

        Parameters
        ----------
        input_ids
            The ids of the tokens of the existing sequences.
        logits
            The logits for the current generation step.

        Returns
        -------
        TensorType
            The biased logits.

        """
        if self.is_first_token:
            self._setup(self.tensor_adapter.shape(input_ids)[0])
            self.is_first_token = False

        # we do not make the matchers consume the last token during the first
        # generation step because no tokens have been generated yet
        else:
            for i in range(self.tensor_adapter.shape(input_ids)[0]):
                sequence = input_ids[i] # type: ignore
                last_token = sequence[-1].item()
                self.ll_matchers[i].consume_token(last_token)
                error = self.ll_matchers[i].get_error()
                if error:
                    warnings.warn(f"Error in LLMatcher: {error}")

        return self._bias_logits(input_ids, logits)

`init(grammar, llg_tokenizer, tensor_library_name)`

Parameters:

Name	Type	Description	Default
`grammar`	`str`	The grammar spec to use to create the LLMatcher	required
`llg_tokenizer`		The LLGuidance tokenizer	required
`tensor_library_name`	`str`	The name of the tensor library used by the model	required

Source code in outlines/backends/llguidance.py

def __init__(
    self,
    grammar: str,
    llg_tokenizer,
    tensor_library_name: str,
) -> None:
    """
    Parameters
    ----------
    grammar: str
        The grammar spec to use to create the LLMatcher
    llg_tokenizer: LLTokenizer
        The LLGuidance tokenizer
    tensor_library_name: str
        The name of the tensor library used by the model

    """
    self.is_first_token = True
    self.grammar = grammar
    self.llg_tokenizer = llg_tokenizer
    self.tensor_library_name = tensor_library_name
    super().__init__(tensor_library_name)

`process_logits(input_ids, logits)`

Use the instances of LLMatcher to bias the logits.

Parameters:

Name	Type	Description	Default
`input_ids`	`TensorType`	The ids of the tokens of the existing sequences.	required
`logits`	`TensorType`	The logits for the current generation step.	required

Returns:

Type	Description
`TensorType`	The biased logits.

Source code in outlines/backends/llguidance.py

def process_logits(
    self, input_ids: TensorType, logits: TensorType
) -> TensorType:
    """Use the instances of LLMatcher to bias the logits.

    Parameters
    ----------
    input_ids
        The ids of the tokens of the existing sequences.
    logits
        The logits for the current generation step.

    Returns
    -------
    TensorType
        The biased logits.

    """
    if self.is_first_token:
        self._setup(self.tensor_adapter.shape(input_ids)[0])
        self.is_first_token = False

    # we do not make the matchers consume the last token during the first
    # generation step because no tokens have been generated yet
    else:
        for i in range(self.tensor_adapter.shape(input_ids)[0]):
            sequence = input_ids[i] # type: ignore
            last_token = sequence[-1].item()
            self.ll_matchers[i].consume_token(last_token)
            error = self.ll_matchers[i].get_error()
            if error:
                warnings.warn(f"Error in LLMatcher: {error}")

    return self._bias_logits(input_ids, logits)

`reset()`

Ensure self._setup is called again for the next generation.

Source code in outlines/backends/llguidance.py

46
47
48

def reset(self):
    """Ensure self._setup is called again for the next generation."""
    self.is_first_token = True

`outlines_core`

Backend class for Outlines Core.

`OutlinesCoreBackend`

Bases: BaseBackend

Backend for Outlines Core.

Source code in outlines/backends/outlines_core.py

class OutlinesCoreBackend(BaseBackend):
    """Backend for Outlines Core."""

    def __init__(self, model: SteerableModel):
        """
        Parameters
        ----------
        model
            The Outlines model of the user.

        """
        if isinstance(model, Transformers):
            tokenizer = model.tokenizer
            vocabulary = tokenizer.get_vocab()
            eos_token_id = tokenizer.eos_token_id
            eos_token = tokenizer.eos_token
            token_to_str = tokenizer.convert_token_to_string
        elif isinstance(model, LlamaCpp):
            tokenizer = model.tokenizer  # type: ignore
            vocabulary = tokenizer.vocabulary
            eos_token_id = tokenizer.eos_token_id
            eos_token = tokenizer.eos_token
            token_to_str = tokenizer.convert_token_to_string
        elif isinstance(model, MLXLM):  # pragma: no cover
            tokenizer = model.mlx_tokenizer  # type: ignore
            vocabulary = tokenizer.get_vocab()
            eos_token_id = tokenizer.eos_token_id
            eos_token = tokenizer.eos_token
            token_to_str = lambda token: tokenizer.convert_tokens_to_string([token])  # type: ignore
        else:  # pragma: no cover
            raise ValueError(f"Unsupported model type: {type(model)}")

        self.eos_token_id = eos_token_id
        self.vocabulary = self.create_outlines_core_vocabulary(
            vocabulary, eos_token_id, eos_token, token_to_str
        )
        self.tensor_library_name = model.tensor_library_name

    def get_json_schema_logits_processor(self, json_schema: str):
        """Create a logits processor from a JSON schema.

        Parameters
        ----------
        json_schema: str
            The JSON schema to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        regex = build_regex_from_schema(json_schema)
        return self.get_regex_logits_processor(regex)

    def get_regex_logits_processor(self, regex: str):
        """Create a logits processor from a regex.

        Parameters
        ----------
        regex: str
            The regex to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        index = Index(regex, self.vocabulary)
        return OutlinesCoreLogitsProcessor(index, self.tensor_library_name)

    def get_cfg_logits_processor(self, grammar):
        raise NotImplementedError(
            "Outlines Core does not support context-free grammar."
        )

    @staticmethod
    def create_outlines_core_vocabulary(
        vocab: Dict[str, int],
        eos_token_id: int,
        eos_token: str,
        token_to_str: Callable[[str], str],
    ) -> Vocabulary:
        """Create an Outlines Core Vocabulary instance.

        Parameters
        ----------
        vocab: Dict[str, int]
            The vocabulary to create an Outlines Core vocabulary from.
        eos_token_id: int
            The EOS token ID.
        eos_token: str
            The EOS token.
        token_to_str: Callable[[str], str]
            The function to convert a token to a string.

        Returns
        -------
        Vocabulary
            The Outlines Core Vocabulary instance.

        """
        formatted_vocab: Dict[str, List[int]] = {}
        for token, token_id in vocab.items():
            # This step is necessary to transform special tokens into their
            # string representation, in particular for spacing. We need those
            # string representations as outlines core first builds an FSM from
            # the regex provided that only contains regular strings.
            token_as_str = token_to_str(token)
            formatted_vocab.setdefault(token_as_str, []).append(token_id)
        eos_token_as_str = token_to_str(eos_token)
        formatted_vocab.pop(eos_token_as_str, None)
        return Vocabulary(eos_token_id, formatted_vocab)

`init(model)`

Parameters:

Name	Type	Description	Default
`model`	`SteerableModel`	The Outlines model of the user.	required

Source code in outlines/backends/outlines_core.py

def __init__(self, model: SteerableModel):
    """
    Parameters
    ----------
    model
        The Outlines model of the user.

    """
    if isinstance(model, Transformers):
        tokenizer = model.tokenizer
        vocabulary = tokenizer.get_vocab()
        eos_token_id = tokenizer.eos_token_id
        eos_token = tokenizer.eos_token
        token_to_str = tokenizer.convert_token_to_string
    elif isinstance(model, LlamaCpp):
        tokenizer = model.tokenizer  # type: ignore
        vocabulary = tokenizer.vocabulary
        eos_token_id = tokenizer.eos_token_id
        eos_token = tokenizer.eos_token
        token_to_str = tokenizer.convert_token_to_string
    elif isinstance(model, MLXLM):  # pragma: no cover
        tokenizer = model.mlx_tokenizer  # type: ignore
        vocabulary = tokenizer.get_vocab()
        eos_token_id = tokenizer.eos_token_id
        eos_token = tokenizer.eos_token
        token_to_str = lambda token: tokenizer.convert_tokens_to_string([token])  # type: ignore
    else:  # pragma: no cover
        raise ValueError(f"Unsupported model type: {type(model)}")

    self.eos_token_id = eos_token_id
    self.vocabulary = self.create_outlines_core_vocabulary(
        vocabulary, eos_token_id, eos_token, token_to_str
    )
    self.tensor_library_name = model.tensor_library_name

`create_outlines_core_vocabulary(vocab, eos_token_id, eos_token, token_to_str)` `staticmethod`

Create an Outlines Core Vocabulary instance.

Parameters:

Name	Type	Description	Default
`vocab`	`Dict[str, int]`	The vocabulary to create an Outlines Core vocabulary from.	required
`eos_token_id`	`int`	The EOS token ID.	required
`eos_token`	`str`	The EOS token.	required
`token_to_str`	`Callable[[str], str]`	The function to convert a token to a string.	required

Returns:

Type	Description
`Vocabulary`	The Outlines Core Vocabulary instance.

Source code in outlines/backends/outlines_core.py

@staticmethod
def create_outlines_core_vocabulary(
    vocab: Dict[str, int],
    eos_token_id: int,
    eos_token: str,
    token_to_str: Callable[[str], str],
) -> Vocabulary:
    """Create an Outlines Core Vocabulary instance.

    Parameters
    ----------
    vocab: Dict[str, int]
        The vocabulary to create an Outlines Core vocabulary from.
    eos_token_id: int
        The EOS token ID.
    eos_token: str
        The EOS token.
    token_to_str: Callable[[str], str]
        The function to convert a token to a string.

    Returns
    -------
    Vocabulary
        The Outlines Core Vocabulary instance.

    """
    formatted_vocab: Dict[str, List[int]] = {}
    for token, token_id in vocab.items():
        # This step is necessary to transform special tokens into their
        # string representation, in particular for spacing. We need those
        # string representations as outlines core first builds an FSM from
        # the regex provided that only contains regular strings.
        token_as_str = token_to_str(token)
        formatted_vocab.setdefault(token_as_str, []).append(token_id)
    eos_token_as_str = token_to_str(eos_token)
    formatted_vocab.pop(eos_token_as_str, None)
    return Vocabulary(eos_token_id, formatted_vocab)

`get_json_schema_logits_processor(json_schema)`

Create a logits processor from a JSON schema.

Parameters:

Name	Type	Description	Default
`json_schema`	`str`	The JSON schema to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/outlines_core.py

def get_json_schema_logits_processor(self, json_schema: str):
    """Create a logits processor from a JSON schema.

    Parameters
    ----------
    json_schema: str
        The JSON schema to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    regex = build_regex_from_schema(json_schema)
    return self.get_regex_logits_processor(regex)

`get_regex_logits_processor(regex)`

Create a logits processor from a regex.

Parameters:

Name	Type	Description	Default
`regex`	`str`	The regex to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/outlines_core.py

def get_regex_logits_processor(self, regex: str):
    """Create a logits processor from a regex.

    Parameters
    ----------
    regex: str
        The regex to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    index = Index(regex, self.vocabulary)
    return OutlinesCoreLogitsProcessor(index, self.tensor_library_name)

`OutlinesCoreLogitsProcessor`

Bases: OutlinesLogitsProcessor

Logits processor for Outlines Core.

Source code in outlines/backends/outlines_core.py

class OutlinesCoreLogitsProcessor(OutlinesLogitsProcessor):
    """Logits processor for Outlines Core."""

    def __init__(self, index: Index, tensor_library_name: str):
        """
        Parameters
        ----------
        index: Index
            The Outlines Core `Index` instance to use to create the Outlines
            Core `Guide` instances that will be used to bias the logits
        tensor_library_name: str
            The tensor library name to use for the logits processor.

        """
        self.index = index
        self.tensor_library_name = tensor_library_name
        self.is_first_token = True
        super().__init__(tensor_library_name)

    def reset(self) -> None:
        """Reset the logits processor."""
        self.is_first_token = True

    def _setup(self, batch_size: int, vocab_size: int) -> None:
        """Set the guides, bitmasks and some functions used in the
        `process_logits` method.

        This method is called when the first token is generated instead of
        at initialization because we need to know the batch size and the device
        of the logits.

        Parameters
        ----------
        batch_size: int
            The batch size.
        vocab_size: int
            The vocabulary size.

        """
        if self.tensor_library_name == "torch":
            from outlines_core.kernels.torch import allocate_token_bitmask

            self.allocate_token_bitmask = allocate_token_bitmask
            self.bias_logits = self._bias_logits_torch

        elif self.tensor_library_name == "numpy":
            from outlines_core.kernels.numpy import allocate_token_bitmask

            self.allocate_token_bitmask = allocate_token_bitmask
            self.bias_logits = self._bias_logits_numpy

        elif self.tensor_library_name == "mlx":  # pragma: no cover
            from outlines_core.kernels.mlx import allocate_token_bitmask

            self.allocate_token_bitmask = allocate_token_bitmask
            self.bias_logits = self._bias_logits_mlx

        else:  # pragma: no cover
            raise ValueError(f"Unsupported tensor library: {self.tensor_library_name}")

        self._guides = [Guide(self.index) for _ in range(batch_size)]
        self._bitmasks = [
            self.allocate_token_bitmask(vocab_size) for _ in range(batch_size)
        ]

    def _bias_logits_mlx(  # pragma: no cover
        self, batch_size: int, logits: TensorType
    ) -> TensorType:
        """Bias the logits for MLX tensors."""
        from outlines_core.kernels.mlx import (
            apply_token_bitmask,
            fill_next_token_bitmask,
        )

        biased_logits_array = []
        for i in range(batch_size):
            fill_next_token_bitmask(self._guides[i], self._bitmasks[i])
            biased_logits = apply_token_bitmask(
                self.tensor_adapter.unsqueeze(logits[i]), # type: ignore
                self._bitmasks[i],  # type: ignore
            )
            biased_logits_array.append(biased_logits)

        return self.tensor_adapter.concatenate(biased_logits_array)

    def _bias_logits_torch(self, batch_size: int, logits: TensorType) -> TensorType:
        """Bias the logits for Torch tensors."""
        from outlines_core.kernels.torch import (
            apply_token_bitmask_inplace,
            fill_next_token_bitmask,
        )

        for i in range(batch_size):
            fill_next_token_bitmask(self._guides[i], self._bitmasks[i])
            self._bitmasks[i] = self.tensor_adapter.to_device(
                self._bitmasks[i], self.tensor_adapter.get_device(logits)
            )
            apply_token_bitmask_inplace(
                self.tensor_adapter.unsqueeze(logits[i]),  # type: ignore
                self._bitmasks[i],
            )
            self._bitmasks[i] = self.tensor_adapter.to_device(self._bitmasks[i], "cpu")

        return logits

    def _bias_logits_numpy(self, batch_size: int, logits: TensorType) -> TensorType:
        """Bias the logits for Numpy tensors."""
        from outlines_core.kernels.numpy import (
            apply_token_bitmask_inplace,
            fill_next_token_bitmask,
        )

        for i in range(batch_size):
            fill_next_token_bitmask(self._guides[i], self._bitmasks[i])
            apply_token_bitmask_inplace(
                self.tensor_adapter.unsqueeze(logits[i]),  # type: ignore
                self._bitmasks[i],
            )

        return logits

    def process_logits(self, input_ids: TensorType, logits: TensorType) -> TensorType:
        """Use the guides to bias the logits.

        Parameters
        ----------
        input_ids
            The ids of the tokens of the existing sequences.
        logits
            The logits for the current generation step.

        Returns
        -------
        TensorType
            The biased logits.

        """
        batch_size = self.tensor_adapter.shape(input_ids)[0]
        vocab_size = self.tensor_adapter.shape(logits)[1]

        if self.is_first_token:
            self._setup(batch_size, vocab_size)
            self.is_first_token = False
        else:
            for i in range(batch_size):
                last_token_id = self.tensor_adapter.to_scalar(input_ids[i][-1])  # type: ignore
                # This circumvents issue #227 in outlines_core
                # Ideally, we would be able to advance all the times as the final
                # state would accept the eos token leading to itself
                if not self._guides[i].is_finished() or self._guides[i].accepts_tokens(
                    [last_token_id]
                ):
                    self._guides[i].advance(token_id=last_token_id, return_tokens=False)

        return self.bias_logits(batch_size, logits)

`init(index, tensor_library_name)`

Parameters:

Name	Type	Description	Default
`index`	`Index`	The Outlines Core `Index` instance to use to create the Outlines Core `Guide` instances that will be used to bias the logits	required
`tensor_library_name`	`str`	The tensor library name to use for the logits processor.	required

Source code in outlines/backends/outlines_core.py

def __init__(self, index: Index, tensor_library_name: str):
    """
    Parameters
    ----------
    index: Index
        The Outlines Core `Index` instance to use to create the Outlines
        Core `Guide` instances that will be used to bias the logits
    tensor_library_name: str
        The tensor library name to use for the logits processor.

    """
    self.index = index
    self.tensor_library_name = tensor_library_name
    self.is_first_token = True
    super().__init__(tensor_library_name)

`process_logits(input_ids, logits)`

Use the guides to bias the logits.

Parameters:

Name	Type	Description	Default
`input_ids`	`TensorType`	The ids of the tokens of the existing sequences.	required
`logits`	`TensorType`	The logits for the current generation step.	required

Returns:

Type	Description
`TensorType`	The biased logits.

Source code in outlines/backends/outlines_core.py

def process_logits(self, input_ids: TensorType, logits: TensorType) -> TensorType:
    """Use the guides to bias the logits.

    Parameters
    ----------
    input_ids
        The ids of the tokens of the existing sequences.
    logits
        The logits for the current generation step.

    Returns
    -------
    TensorType
        The biased logits.

    """
    batch_size = self.tensor_adapter.shape(input_ids)[0]
    vocab_size = self.tensor_adapter.shape(logits)[1]

    if self.is_first_token:
        self._setup(batch_size, vocab_size)
        self.is_first_token = False
    else:
        for i in range(batch_size):
            last_token_id = self.tensor_adapter.to_scalar(input_ids[i][-1])  # type: ignore
            # This circumvents issue #227 in outlines_core
            # Ideally, we would be able to advance all the times as the final
            # state would accept the eos token leading to itself
            if not self._guides[i].is_finished() or self._guides[i].accepts_tokens(
                [last_token_id]
            ):
                self._guides[i].advance(token_id=last_token_id, return_tokens=False)

    return self.bias_logits(batch_size, logits)

`reset()`

Reset the logits processor.

Source code in outlines/backends/outlines_core.py

38
39
40

def reset(self) -> None:
    """Reset the logits processor."""
    self.is_first_token = True

`xgrammar`

Backend class for XGrammar.

`XGrammarBackend`

Bases: BaseBackend

Backend for XGrammar.

Source code in outlines/backends/xgrammar.py

class XGrammarBackend(BaseBackend):
    """Backend for XGrammar."""

    def __init__(self, model: SteerableModel):
        """
        Parameters
        ----------
        model
            The Outlines model of the user.

        """
        import xgrammar as xgr

        if isinstance(model, Transformers):
            tokenizer = model.hf_tokenizer
        elif isinstance(model, MLXLM): # pragma: no cover
            tokenizer = model.mlx_tokenizer._tokenizer
        else: # pragma: no cover
            raise ValueError(
                "The xgrammar backend only supports Transformers and "
                + "MLXLM models"
            )

        tokenizer_info = xgr.TokenizerInfo.from_huggingface(
            tokenizer,
            vocab_size=len(tokenizer.get_vocab())
        )
        self.grammar_compiler = xgr.GrammarCompiler(tokenizer_info)
        self.tensor_library_name = model.tensor_library_name

    def get_json_schema_logits_processor(
        self, json_schema: str
    ) -> XGrammarLogitsProcessor:
        """Create a logits processor from a JSON schema.

        Parameters
        ----------
        json_schema: str
            The JSON schema to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        compiled_grammar = self.grammar_compiler.compile_json_schema(
            json_schema
        )
        return XGrammarLogitsProcessor(
            compiled_grammar,
            self.tensor_library_name
        )

    def get_regex_logits_processor(
        self, regex: str
    ) -> XGrammarLogitsProcessor:
        """Create a logits processor from a regex.

        Parameters
        ----------
        regex: str
            The regex to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        compiled_grammar = self.grammar_compiler.compile_regex(regex)
        return XGrammarLogitsProcessor(
            compiled_grammar,
            self.tensor_library_name
        )

    def get_cfg_logits_processor(
        self, grammar: str
    ) -> XGrammarLogitsProcessor:
        """Create a logits processor from a context-free grammar.

        Parameters
        ----------
        grammar: str
            The context-free grammar to create a logits processor from.

        Returns
        -------
        LogitsProcessor
            The logits processor to use to constrain the generation.

        """
        compiled_grammar = self.grammar_compiler.compile_grammar(grammar)
        return XGrammarLogitsProcessor(
            compiled_grammar,
            self.tensor_library_name
        )

`init(model)`

Parameters:

Name	Type	Description	Default
`model`	`SteerableModel`	The Outlines model of the user.	required

Source code in outlines/backends/xgrammar.py

def __init__(self, model: SteerableModel):
    """
    Parameters
    ----------
    model
        The Outlines model of the user.

    """
    import xgrammar as xgr

    if isinstance(model, Transformers):
        tokenizer = model.hf_tokenizer
    elif isinstance(model, MLXLM): # pragma: no cover
        tokenizer = model.mlx_tokenizer._tokenizer
    else: # pragma: no cover
        raise ValueError(
            "The xgrammar backend only supports Transformers and "
            + "MLXLM models"
        )

    tokenizer_info = xgr.TokenizerInfo.from_huggingface(
        tokenizer,
        vocab_size=len(tokenizer.get_vocab())
    )
    self.grammar_compiler = xgr.GrammarCompiler(tokenizer_info)
    self.tensor_library_name = model.tensor_library_name

`get_cfg_logits_processor(grammar)`

Create a logits processor from a context-free grammar.

Parameters:

Name	Type	Description	Default
`grammar`	`str`	The context-free grammar to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/xgrammar.py

def get_cfg_logits_processor(
    self, grammar: str
) -> XGrammarLogitsProcessor:
    """Create a logits processor from a context-free grammar.

    Parameters
    ----------
    grammar: str
        The context-free grammar to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    compiled_grammar = self.grammar_compiler.compile_grammar(grammar)
    return XGrammarLogitsProcessor(
        compiled_grammar,
        self.tensor_library_name
    )

`get_json_schema_logits_processor(json_schema)`

Create a logits processor from a JSON schema.

Parameters:

Name	Type	Description	Default
`json_schema`	`str`	The JSON schema to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/xgrammar.py

def get_json_schema_logits_processor(
    self, json_schema: str
) -> XGrammarLogitsProcessor:
    """Create a logits processor from a JSON schema.

    Parameters
    ----------
    json_schema: str
        The JSON schema to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    compiled_grammar = self.grammar_compiler.compile_json_schema(
        json_schema
    )
    return XGrammarLogitsProcessor(
        compiled_grammar,
        self.tensor_library_name
    )

`get_regex_logits_processor(regex)`

Create a logits processor from a regex.

Parameters:

Name	Type	Description	Default
`regex`	`str`	The regex to create a logits processor from.	required

Returns:

Type	Description
`LogitsProcessor`	The logits processor to use to constrain the generation.

Source code in outlines/backends/xgrammar.py

def get_regex_logits_processor(
    self, regex: str
) -> XGrammarLogitsProcessor:
    """Create a logits processor from a regex.

    Parameters
    ----------
    regex: str
        The regex to create a logits processor from.

    Returns
    -------
    LogitsProcessor
        The logits processor to use to constrain the generation.

    """
    compiled_grammar = self.grammar_compiler.compile_regex(regex)
    return XGrammarLogitsProcessor(
        compiled_grammar,
        self.tensor_library_name
    )

`XGrammarLogitsProcessor`

Bases: OutlinesLogitsProcessor

Logits processor for XGrammar.

Source code in outlines/backends/xgrammar.py

class XGrammarLogitsProcessor(OutlinesLogitsProcessor):
    """Logits processor for XGrammar."""

    def __init__(self, compiled_grammar: str, tensor_library_name: str,):
        """
        Parameters
        ----------
        compiled_grammar: str
            The compiled grammar to use to create the logits processor.
        tensor_library_name: str
            The name of the tensor library used by the model

        """
        import xgrammar as xgr

        self.xgr = xgr
        self.is_first_token = True
        self.compiled_grammar = compiled_grammar
        self.tensor_library_name = tensor_library_name
        super().__init__(tensor_library_name)

    def reset(self):
        """Ensure self._setup is called again for the next generation."""
        self.is_first_token = True

    def _setup(self, batch_size: int, vocab_size: int) -> None:
        """Setup the logits processor for a new generation."""
        if self.tensor_library_name == "torch":
            self._bias_logits = self._bias_logits_torch
        elif self.tensor_library_name == "mlx": # pragma: no cover
            self._bias_logits = self._bias_logits_mlx
        else: # pragma: no cover
            raise ValueError(
                f"Unsupported tensor library: {self.tensor_library_name}"
            )

        self._matchers = [
            self.xgr.GrammarMatcher(self.compiled_grammar)
            for _ in range(batch_size)
        ]
        self._bitmask = self.xgr.allocate_token_bitmask(batch_size, vocab_size)

    def _bias_logits_torch(
        self, input_ids: TensorType, logits: TensorType
    ) -> TensorType:
        """Bias the logits for Torch tensors."""
        for i in range(self.tensor_adapter.shape(input_ids)[0]):
            if not self._matchers[i].is_terminated():
                self._matchers[i].fill_next_token_bitmask(self._bitmask, i)

        self._bitmask = self.tensor_adapter.to_device(
            self._bitmask,
            self.tensor_adapter.get_device(logits)
        )
        self.xgr.apply_token_bitmask_inplace(logits, self._bitmask)
        self._bitmask = self.tensor_adapter.to_device(
            self._bitmask,
            "cpu"
        )

        return logits

    def _bias_logits_mlx( # pragma: no cover
        self, input_ids: TensorType, logits: TensorType
    ) -> TensorType:
        """Bias the logits for MLX tensors."""
        import mlx.core as mx
        from xgrammar.kernels.apply_token_bitmask_mlx import apply_token_bitmask_mlx

        for i in range(self.tensor_adapter.shape(input_ids)[0]):
            if not self._matchers[i].is_terminated():
                self._matchers[i].fill_next_token_bitmask(self._bitmask, i)

        biased_logits = apply_token_bitmask_mlx(
            mx.array(self._bitmask.numpy()), logits, self.tensor_adapter.shape(logits)[1]
        )

        return biased_logits

    def process_logits(
        self, input_ids: TensorType, logits: TensorType
    ) -> TensorType:
        """Use the XGrammar matchers to bias the logits."""
        batch_size = self.tensor_adapter.shape(input_ids)[0]
        vocab_size = self.tensor_adapter.shape(logits)[1]

        if self.is_first_token:
            self._setup(batch_size, vocab_size)
            self.is_first_token = False
        else:
            for i in range(batch_size):
                if not self._matchers[i].is_terminated(): # pragma: no cover
                    last_token_id = self.tensor_adapter.to_scalar(
                        input_ids[i][-1] # type: ignore
                    )
                    assert self._matchers[i].accept_token(last_token_id)

        return self._bias_logits(input_ids, logits)

`init(compiled_grammar, tensor_library_name)`

Parameters:

Name	Type	Description	Default
`compiled_grammar`	`str`	The compiled grammar to use to create the logits processor.	required
`tensor_library_name`	`str`	The name of the tensor library used by the model	required

Source code in outlines/backends/xgrammar.py

def __init__(self, compiled_grammar: str, tensor_library_name: str,):
    """
    Parameters
    ----------
    compiled_grammar: str
        The compiled grammar to use to create the logits processor.
    tensor_library_name: str
        The name of the tensor library used by the model

    """
    import xgrammar as xgr

    self.xgr = xgr
    self.is_first_token = True
    self.compiled_grammar = compiled_grammar
    self.tensor_library_name = tensor_library_name
    super().__init__(tensor_library_name)

`process_logits(input_ids, logits)`

Use the XGrammar matchers to bias the logits.

Source code in outlines/backends/xgrammar.py

def process_logits(
    self, input_ids: TensorType, logits: TensorType
) -> TensorType:
    """Use the XGrammar matchers to bias the logits."""
    batch_size = self.tensor_adapter.shape(input_ids)[0]
    vocab_size = self.tensor_adapter.shape(logits)[1]

    if self.is_first_token:
        self._setup(batch_size, vocab_size)
        self.is_first_token = False
    else:
        for i in range(batch_size):
            if not self._matchers[i].is_terminated(): # pragma: no cover
                last_token_id = self.tensor_adapter.to_scalar(
                    input_ids[i][-1] # type: ignore
                )
                assert self._matchers[i].accept_token(last_token_id)

    return self._bias_logits(input_ids, logits)

`reset()`

Ensure self._setup is called again for the next generation.

Source code in outlines/backends/xgrammar.py

34
35
36

def reset(self):
    """Ensure self._setup is called again for the next generation."""
    self.is_first_token = True

`caching`

Caching and memoization of function calls.

`cache(expire=None, typed=False, ignore=())`

Caching decorator for memoizing function calls.

The cache key is created based on the values returned by the key_function callable if provided or based on the arguments of the decorated function directly otherwise

This is based on diskcache's memoize.

Parameters:

Name	Type	Description	Default
`expire`	`Optional[float]`	Seconds until arguments expire.	`None`
`typed`		Cache different types separately.	`False`
`ignore`		Positional or keyword arguments to ignore.	`()`

Returns:

Type	Description
`A decorator function that can be applied to other functions.`

Source code in outlines/caching.py

def cache(expire: Optional[float] = None, typed=False, ignore=()):
    """Caching decorator for memoizing function calls.

    The cache key is created based on the values returned by the key_function callable
    if provided or based on the arguments of the decorated function directly otherwise

    This is based on `diskcache`'s `memoize`.

    Parameters
    ----------
    expire
        Seconds until arguments expire.
    typed
        Cache different types separately.
    ignore
        Positional or keyword arguments to ignore.

    Returns
    -------
        A decorator function that can be applied to other functions.
    """

    def decorator(cached_function: Callable):
        memory = get_cache()

        base = (full_name(cached_function),)

        if asyncio.iscoroutinefunction(cached_function):  # pragma: no cover

            async def wrapper(*args, **kwargs):
                if not _caching_enabled:
                    return await cached_function(*args, **kwargs)

                cache_key = wrapper.__cache_key__(*args, **kwargs)
                result = wrapper.__memory__.get(cache_key, default=ENOVAL, retry=True)

                if result is ENOVAL:
                    result = await cached_function(*args, **kwargs)
                    wrapper.__memory__.set(cache_key, result, expire, retry=True)

                return result

        else:

            def wrapper(*args, **kwargs):
                if not _caching_enabled:
                    return cached_function(*args, **kwargs)

                cache_key = wrapper.__cache_key__(*args, **kwargs)
                result = wrapper.__memory__.get(cache_key, default=ENOVAL, retry=True)

                if result is ENOVAL:
                    result = cached_function(*args, **kwargs)
                    wrapper.__memory__.set(cache_key, result, expire, retry=True)

                return result

        def __cache_key__(*args, **kwargs):
            """Make key for cache given function arguments."""
            return args_to_key(base, args, kwargs, typed, ignore)

        wrapper.__cache_key__ = __cache_key__  # type: ignore
        wrapper.__memory__ = memory  # type: ignore
        wrapper.__wrapped__ = cached_function  # type: ignore

        return wrapper

    return decorator

`clear_cache()`

Erase the cache completely.

Source code in outlines/caching.py

def clear_cache():
    """Erase the cache completely."""
    memory = get_cache()
    memory.clear()

`disable_cache()`

Disable the cache for this session.

Generative models output different results each time they are called when sampling. This can be a desirable property for some workflows, in which case one can call outlines.call.disable to disable the cache for the session.

This function does not delete the cache, call outlines.cache.clear instead. It also does not overwrite the cache with the values returned during the session.

Example

outlines.cache.disable should be called right after importing outlines:

import outlines.caching as cache cache.disable_cache()

Source code in outlines/caching.py

def disable_cache():
    """Disable the cache for this session.

    Generative models output different results each time they are called when
    sampling. This can be a desirable property for some workflows, in which case
    one can call `outlines.call.disable` to disable the cache for the session.

    This function does not delete the cache, call `outlines.cache.clear`
    instead. It also does not overwrite the cache with the values returned
    during the session.

    Example
    -------

    `outlines.cache.disable` should be called right after importing outlines:

    >>> import outlines.caching as cache
    >>> cache.disable_cache()

    """
    global _caching_enabled
    _caching_enabled = False

`get_cache()` `cached`

Get the context object that contains previously-computed return values.

The cache is used to avoid unnecessary computations and API calls, which can be long and expensive for large models.

The cache directory defaults to HOMEDIR/.cache/outlines, but this choice can be overridden by the user by setting the value of the OUTLINES_CACHE_DIR environment variable.

Source code in outlines/caching.py

@functools.lru_cache(1)
def get_cache():
    """Get the context object that contains previously-computed return values.

    The cache is used to avoid unnecessary computations and API calls, which can
    be long and expensive for large models.

    The cache directory defaults to `HOMEDIR/.cache/outlines`, but this choice
    can be overridden by the user by setting the value of the `OUTLINES_CACHE_DIR`
    environment variable.

    """
    from outlines._version import __version__ as outlines_version  # type: ignore

    outlines_cache_dir = os.environ.get("OUTLINES_CACHE_DIR")
    xdg_cache_home = os.environ.get("XDG_CACHE_HOME")
    home_dir = os.path.normpath(os.path.expanduser("~"))
    if outlines_cache_dir:
        # OUTLINES_CACHE_DIR takes precedence
        cache_dir = outlines_cache_dir
    elif xdg_cache_home:  # pragma: no cover
        cache_dir = os.path.join(xdg_cache_home, "outlines")
    elif home_dir != "/": # pragma: no cover
        cache_dir = os.path.join(home_dir, ".cache", "outlines")
    else:  # pragma: no cover
        # home_dir may be / inside a docker container without existing user
        tempdir = tempfile.gettempdir()
        cache_dir = os.path.join(tempdir, ".cache", "outlines")

    memory = Cache(
        cache_dir,
        eviction_policy="none",
        cull_limit=0,
        disk=CloudpickleDisk,
    )

    # ensure if version upgrade occurs, old cache is pruned
    if outlines_version != memory.get("__version__"):
        memory.clear()
    memory["__version__"] = outlines_version

    return memory

`exceptions`

Outlines exception hierarchy and per-provider normalization.

All public exceptions inherit from APIError → OutlinesError → Exception. Use :func:normalize_provider_exception to convert a raw provider SDK exception into the appropriate Outlines type.

`APIConnectionError`

Bases: APIError

Unreachable host, DNS failure, or refused connection.

Source code in outlines/exceptions.py

class APIConnectionError(APIError):
    """Unreachable host, DNS failure, or refused connection."""
    retryable = True
    hint = "Could not reach provider, check connection."

`APIError`

Bases: OutlinesError

Base class for all provider API errors raised by Outlines.

Subclasses map to specific HTTP status codes or failure categories (see the hierarchy below). Catch this class to handle any provider error generically, or catch a subclass for finer-grained control.

Attributes:

Name	Type	Description
`provider`	`str \| None`	Short provider name, e.g. `"openai"` or `"anthropic"`.
`original_exception`	`Exception \| None`	The raw SDK exception that was caught, preserved for debugging.
`status_code`	`int \| None`	HTTP status code, if one could be extracted from the exception.
`request_id`	`str \| None`	Provider request ID extracted from the exception or response headers, useful when filing bug reports with a provider.
`retryable`	`bool`	`True` for transient errors worth retrying (rate limits, timeouts, 5xx server errors, connection failures). `False` for permanent errors that require fixing the request or credentials.
`hint`	`str`	Short human-readable suggestion. On Python 3.11+ this is attached via `add_note()` and displayed as `→ <hint>` on its own line in tracebacks; on 3.10 it is stored as `_hint_note`.

Source code in outlines/exceptions.py

class APIError(OutlinesError):
    """Base class for all provider API errors raised by Outlines.

    Subclasses map to specific HTTP status codes or failure categories
    (see the hierarchy below). Catch this class to handle any provider
    error generically, or catch a subclass for finer-grained control.

    Attributes
    ----------
    provider : str | None
        Short provider name, e.g. ``"openai"`` or ``"anthropic"``.
    original_exception : Exception | None
        The raw SDK exception that was caught, preserved for debugging.
    status_code : int | None
        HTTP status code, if one could be extracted from the exception.
    request_id : str | None
        Provider request ID extracted from the exception or response headers,
        useful when filing bug reports with a provider.
    retryable : bool
        ``True`` for transient errors worth retrying (rate limits, timeouts,
        5xx server errors, connection failures). ``False`` for permanent errors
        that require fixing the request or credentials.
    hint : str
        Short human-readable suggestion. On Python 3.11+ this is attached via
        ``add_note()`` and displayed as ``  → <hint>`` on its own line in
        tracebacks; on 3.10 it is stored as ``_hint_note``.
    """

    retryable: bool = False  # overridden to True on transient error subclasses
    hint: str = ""           # overridden on each subclass with actionable advice

    def __init__(
        self,
        message: str | None = None,
        provider: str | None = None,
        original_exception: Exception | None = None,
        status_code: int | None = None,
        request_id: str | None = None,
    ):
        if message is None:
            if provider and original_exception is not None:
                message = f"API Error [{provider}]: {original_exception}"
            elif provider:
                message = f"API Error [{provider}]"
            elif original_exception is not None:
                message = f"API error: {original_exception}"
            else:
                message = "API error"

        super().__init__(message)

        # PEP 678 (Python 3.11+): notes appear on their own line in tracebacks
        if self.hint:
            note = f"  → {self.hint}"
            if hasattr(self, "add_note"):  # Python 3.11+
                self.add_note(note)
            else:
                self._hint_note = note

        self.provider = provider
        self.original_exception = original_exception
        self.status_code = status_code or _extract_status_code(original_exception)
        self.request_id = request_id

        if original_exception is not None:
            self.request_id = self.request_id or _extract_request_id(original_exception)

`APITimeoutError`

Bases: APIError

Request or connect timeout.

Source code in outlines/exceptions.py

class APITimeoutError(APIError):
    """Request or connect timeout."""
    retryable = True
    hint = "Provider may be overloaded."

`AuthenticationError`

Bases: APIError

401 - bad or missing API key.

Source code in outlines/exceptions.py

107
108
109

class AuthenticationError(APIError):
    """401 - bad or missing API key."""
    hint = "Check API key."

`BadRequestError`

Bases: APIError

400, 409, 413, 422, other 4xx - malformed request.

Source code in outlines/exceptions.py

class BadRequestError(APIError):
    """400, 409, 413, 422, other 4xx - malformed request."""
    hint = (
        "Check prompt length, schema, unsupported parameters, etc. "
        "If this is a provider schema support error, try a local model or dottxt."
    )

`GenerationError`

Bases: APIError

Content filter hit, length stop, or refusal.

Source code in outlines/exceptions.py

165
166
167

class GenerationError(APIError):
    """Content filter hit, length stop, or refusal."""
    hint = "Output was refused, hit a content filter, reached the model's max-token limit, or similar."

`NotFoundError`

Bases: APIError

404 - wrong model name or endpoint.

Source code in outlines/exceptions.py

117
118
119

class NotFoundError(APIError):
    """404 - wrong model name or endpoint."""
    hint = "Confirm model name, endpoint, etc."

`PermissionDeniedError`

Bases: APIError

403 - valid key, insufficient scope.

Source code in outlines/exceptions.py

112
113
114

class PermissionDeniedError(APIError):
    """403 - valid key, insufficient scope."""
    hint = "Check permissions for API key."

`ProviderResponseError`

Bases: APIError

Malformed or unparseable response from the provider.

Source code in outlines/exceptions.py

160
161
162

class ProviderResponseError(APIError):
    """Malformed or unparseable response from the provider."""
    hint = "May be a temporary issue or schema/format mismatch."

`RateLimitError`

Bases: APIError

429 - rate limit exceeded.

Source code in outlines/exceptions.py

class RateLimitError(APIError):
    """429 - rate limit exceeded."""
    retryable = True
    hint = "Slow down and retry, or reduce request frequency, batch size, etc."

`ServerError`

Bases: APIError

500-599, Anthropic 529 - server-side failure.

Source code in outlines/exceptions.py

class ServerError(APIError):
    """500-599, Anthropic 529 - server-side failure."""
    retryable = True
    hint = "Perhaps retry after a short wait."

`is_provider_exception(exc, provider)`

Return True only for provider/transport exceptions we expect to normalize.

This prevents programmer errors (TypeError, AttributeError, etc.) from being silently re-labeled as APIError.

If the exception matches the provider's explicit SDK map → True.
If the exception carries an HTTP status code → True (covers SDK base classes not listed individually, e.g. ollama.ResponseError).
Otherwise → False (let it propagate as-is).

Source code in outlines/exceptions.py

def is_provider_exception(exc: Exception, provider: str) -> bool:
    """Return ``True`` only for provider/transport exceptions we expect to normalize.

    This prevents programmer errors (``TypeError``, ``AttributeError``, etc.)
    from being silently re-labeled as ``APIError``.

    1. If the exception matches the provider's explicit SDK map → True.
    2. If the exception carries an HTTP status code → True (covers SDK base
       classes not listed individually, e.g. ``ollama.ResponseError``).
    3. Otherwise → False (let it propagate as-is).
    """
    exc_map = _build_exception_map(provider)
    if isinstance(exc, tuple(exc_map.keys())):
        return True
    return _extract_status_code(exc) is not None

`normalize_provider_errors(provider)`

Normalize provider exceptions raised inside this block.

This is a context manager instead of a decorator so wrappers can use the same helper around sync calls, awaited async calls, sync generators, async generators, and other provider SDK control-flow shapes without needing separate wrapper machinery for each function kind.

Source code in outlines/exceptions.py

@contextmanager
def normalize_provider_errors(provider: str) -> Iterator[None]:
    """Normalize provider exceptions raised inside this block.

    This is a context manager instead of a decorator so wrappers can use the
    same helper around sync calls, awaited async calls, sync generators, async
    generators, and other provider SDK control-flow shapes without needing
    separate wrapper machinery for each function kind.
    """
    try:
        yield
    except Exception as exc:
        if not is_provider_exception(exc, provider):
            raise
        raise normalize_provider_exception(exc, provider) from exc

`normalize_provider_exception(exc, provider)`

Map a provider SDK exception to the appropriate Outlines exception.

Try the provider's SDK exception map (most-specific-first via isinstance).
Fall back to status-code inspection on the original exception.
Default to generic APIError.

Source code in outlines/exceptions.py

def normalize_provider_exception(exc: Exception, provider: str) -> APIError:
    """Map a provider SDK exception to the appropriate Outlines exception.

    1. Try the provider's SDK exception map (most-specific-first via isinstance).
    2. Fall back to status-code inspection on the original exception.
    3. Default to generic APIError.
    """
    for provider_exc_cls, outlines_exc_cls in _build_exception_map(provider).items():
        if isinstance(exc, provider_exc_cls):
            return outlines_exc_cls(provider=provider, original_exception=exc)

    code = _extract_status_code(exc)
    if code is not None:
        if code in _STATUS_CODE_MAP:
            return _STATUS_CODE_MAP[code](provider=provider, original_exception=exc)
        if code >= 500:
            return ServerError(provider=provider, original_exception=exc)
        if 400 <= code < 500:
            return BadRequestError(provider=provider, original_exception=exc)

    return APIError(provider=provider, original_exception=exc)

`generator`

Encapsulate a model and an output type into a reusable object.

`AsyncBlackBoxGenerator`

Asynchronous generator for which we don't control constrained generation.

The output type provided is not compiled into a logits processor, but is instead directly passed on to the model.

Source code in outlines/generator.py

class AsyncBlackBoxGenerator:
    """Asynchronous generator for which we don't control constrained
    generation.

    The output type provided is not compiled into a logits processor, but is
    instead directly passed on to the model.

    """
    output_type: Optional[Any]

    def __init__(self, model: AsyncBlackBoxModel, output_type: Optional[Any]):
        """
        Parameters
        ----------
        model
            An instance of an Outlines model.
        output_type
            The output type that will be used to constrain the generation.

        """
        self.model = model
        self.output_type = output_type

    async def __call__(self, prompt: Any, **inference_kwargs) -> Any:
        """Generate a response from the model.

        Parameters
        ----------
        prompt
            The prompt to use to generate a response.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        return await self.model.generate(
            prompt, self.output_type, **inference_kwargs
        )

    async def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]:
        """Generate a batch of responses from the model.

        Parameters
        ----------
        prompts
            The list of prompts to use to generate a batch of responses.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        return await self.model.generate_batch(
            prompts, self.output_type, **inference_kwargs
        )

    async def stream(self, prompt: Any, **inference_kwargs) -> AsyncIterator[Any]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        prompt
            The prompt to use to generate a response.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        async for chunk in self.model.generate_stream(  # pragma: no cover
            prompt, self.output_type, **inference_kwargs
        ):
            yield chunk

`call(prompt, **inference_kwargs)` `async`

Generate a response from the model.

Parameters:

Name	Type	Description	Default
`prompt`	`Any`	The prompt to use to generate a response.	required
`**inference_kwargs`		Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/generator.py

async def __call__(self, prompt: Any, **inference_kwargs) -> Any:
    """Generate a response from the model.

    Parameters
    ----------
    prompt
        The prompt to use to generate a response.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    return await self.model.generate(
        prompt, self.output_type, **inference_kwargs
    )

`init(model, output_type)`

Parameters:

Name	Type	Description	Default
`model`	`AsyncBlackBoxModel`	An instance of an Outlines model.	required
`output_type`	`Optional[Any]`	The output type that will be used to constrain the generation.	required

Source code in outlines/generator.py

def __init__(self, model: AsyncBlackBoxModel, output_type: Optional[Any]):
    """
    Parameters
    ----------
    model
        An instance of an Outlines model.
    output_type
        The output type that will be used to constrain the generation.

    """
    self.model = model
    self.output_type = output_type

`batch(prompts, **inference_kwargs)` `async`

Generate a batch of responses from the model.

Parameters:

Name	Type	Description	Default
`prompts`	`List[Any]`	The list of prompts to use to generate a batch of responses.	required
`**inference_kwargs`		Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/generator.py

async def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]:
    """Generate a batch of responses from the model.

    Parameters
    ----------
    prompts
        The list of prompts to use to generate a batch of responses.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    return await self.model.generate_batch(
        prompts, self.output_type, **inference_kwargs
    )

`stream(prompt, **inference_kwargs)` `async`

Generate a stream of responses from the model.

Parameters:

Name	Type	Description	Default
`prompt`	`Any`	The prompt to use to generate a response.	required
`**inference_kwargs`		Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/generator.py

async def stream(self, prompt: Any, **inference_kwargs) -> AsyncIterator[Any]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    prompt
        The prompt to use to generate a response.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    async for chunk in self.model.generate_stream(  # pragma: no cover
        prompt, self.output_type, **inference_kwargs
    ):
        yield chunk

`BlackBoxGenerator`

Synchronous generator for which we don't control constrained generation.

The output type provided is not compiled into a logits processor, but is instead directly passed on to the model.

Source code in outlines/generator.py

class BlackBoxGenerator:
    """Synchronous generator for which we don't control constrained
    generation.

    The output type provided is not compiled into a logits processor, but is
    instead directly passed on to the model.

    """
    output_type: Optional[Any]

    def __init__(self, model: BlackBoxModel, output_type: Optional[Any]):
        """
        Parameters
        ----------
        model
            An instance of an Outlines model.
        output_type
            The output type that will be used to constrain the generation.

        """
        self.model = model
        self.output_type = output_type

    def __call__(self, prompt: Any, **inference_kwargs) -> Any:
        """Generate a response from the model.

        Parameters
        ----------
        prompt
            The prompt to use to generate a response.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        return self.model.generate(
            prompt, self.output_type, **inference_kwargs
        )

    def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]:
        """Generate a batch of responses from the model.

        Parameters
        ----------
        prompts
            The list of prompts to use to generate a batch of responses.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        return self.model.generate_batch(
            prompts, self.output_type, **inference_kwargs
        )

    def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        prompt
            The prompt to use to generate a response.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        return self.model.generate_stream(
            prompt, self.output_type, **inference_kwargs
        )

`call(prompt, **inference_kwargs)`

Generate a response from the model.

Parameters:

Name	Type	Description	Default
`prompt`	`Any`	The prompt to use to generate a response.	required
`**inference_kwargs`		Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/generator.py

def __call__(self, prompt: Any, **inference_kwargs) -> Any:
    """Generate a response from the model.

    Parameters
    ----------
    prompt
        The prompt to use to generate a response.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    return self.model.generate(
        prompt, self.output_type, **inference_kwargs
    )

`init(model, output_type)`

Parameters:

Name	Type	Description	Default
`model`	`BlackBoxModel`	An instance of an Outlines model.	required
`output_type`	`Optional[Any]`	The output type that will be used to constrain the generation.	required

Source code in outlines/generator.py

def __init__(self, model: BlackBoxModel, output_type: Optional[Any]):
    """
    Parameters
    ----------
    model
        An instance of an Outlines model.
    output_type
        The output type that will be used to constrain the generation.

    """
    self.model = model
    self.output_type = output_type

`batch(prompts, **inference_kwargs)`

Generate a batch of responses from the model.

Parameters:

Name	Type	Description	Default
`prompts`	`List[Any]`	The list of prompts to use to generate a batch of responses.	required
`**inference_kwargs`		Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/generator.py

def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]:
    """Generate a batch of responses from the model.

    Parameters
    ----------
    prompts
        The list of prompts to use to generate a batch of responses.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    return self.model.generate_batch(
        prompts, self.output_type, **inference_kwargs
    )

`stream(prompt, **inference_kwargs)`

Generate a stream of responses from the model.

Parameters:

Name	Type	Description	Default
`prompt`	`Any`	The prompt to use to generate a response.	required
`**inference_kwargs`		Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/generator.py

def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    prompt
        The prompt to use to generate a response.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    return self.model.generate_stream(
        prompt, self.output_type, **inference_kwargs
    )

`SteerableGenerator`

Represents a generator for which we control constrained generation.

The generator is responsible for building and storing the logits processor (which can be quite expensive to build), and then passing it to the model when the generator is called.

The argument defining constrained generation can be of 2 types associated to different methods to create an instance of the generator: - output_type (through __init__): an output type as defined in the outlines.types module - processor (through from_processor): an already built logits processor as defined in the outlines.processors module

The 2 parameters are mutually exclusive.

Source code in outlines/generator.py

class SteerableGenerator:
    """Represents a generator for which we control constrained generation.

    The generator is responsible for building and storing the logits processor
    (which can be quite expensive to build), and then passing it to the model
    when the generator is called.

    The argument defining constrained generation can be of 2 types associated
    to different methods to create an instance of the generator:
    - `output_type` (through `__init__`): an output type as defined in the
      `outlines.types` module
    - `processor` (through `from_processor`): an already built logits processor
       as defined in the `outlines.processors` module

    The 2 parameters are mutually exclusive.

    """
    logits_processor: Optional[LogitsProcessorType]

    def __init__(
        self,
        model: SteerableModel,
        output_type: Optional[Any],
        backend_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        model
            An instance of an Outlines model.
        output_type
            The output type expressed as a Python type
        backend_name
            The name of the backend to use to create the logits processor.

        """
        self.model = model
        if output_type is None:
            self.logits_processor = None
        else:
            term = python_types_to_terms(output_type)
            if isinstance(term, CFG):
                cfg_string = term.definition
                self.logits_processor = get_cfg_logits_processor(
                    backend_name,
                    model,
                    cfg_string,
                )
            elif isinstance(term, JsonSchema):
                self.logits_processor = get_json_schema_logits_processor(
                    backend_name,
                    model,
                    term.schema,
                )
            else:
                regex_string = to_regex(term)
                self.logits_processor = get_regex_logits_processor(
                    backend_name,
                    model,
                    regex_string,
                )

    @classmethod
    def from_processor(
        cls, model: SteerableModel, processor: LogitsProcessorType
    ):
        """Create a generator from a logits processor.

        Parameters
        ----------
        model
            An instance of an Outlines model.
        processor
            An instance of a logits processor.

        """
        instance = cls.__new__(cls)
        instance.model = model
        instance.logits_processor = processor

        return instance

    def __call__(self, prompt: Any, **inference_kwargs) -> Any:
        """Generate a response from the model.

        Parameters
        ----------
        prompt
            The prompt to use to generate a response.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        if self.logits_processor is not None:
            self.logits_processor.reset()
        return self.model.generate(
            prompt, self.logits_processor, **inference_kwargs
        )

    def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]:
        """Generate a batch of responses from the model.

        Parameters
        ----------
        prompts
            The list of prompts to use to generate a batch of responses.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        if self.logits_processor is not None:
            self.logits_processor.reset()
        return self.model.generate_batch(
            prompts, self.logits_processor, **inference_kwargs
        )

    def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        prompt
            The prompt to use to generate a response.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        if self.logits_processor is not None:
            self.logits_processor.reset()
        return self.model.generate_stream(
            prompt, self.logits_processor, **inference_kwargs
        )

`call(prompt, **inference_kwargs)`

Generate a response from the model.

Parameters:

Name	Type	Description	Default
`prompt`	`Any`	The prompt to use to generate a response.	required
`**inference_kwargs`		Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/generator.py

def __call__(self, prompt: Any, **inference_kwargs) -> Any:
    """Generate a response from the model.

    Parameters
    ----------
    prompt
        The prompt to use to generate a response.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    if self.logits_processor is not None:
        self.logits_processor.reset()
    return self.model.generate(
        prompt, self.logits_processor, **inference_kwargs
    )

`init(model, output_type, backend_name=None)`

Parameters:

Name	Type	Description	Default
`model`	`SteerableModel`	An instance of an Outlines model.	required
`output_type`	`Optional[Any]`	The output type expressed as a Python type	required
`backend_name`	`Optional[str]`	The name of the backend to use to create the logits processor.	`None`

Source code in outlines/generator.py

def __init__(
    self,
    model: SteerableModel,
    output_type: Optional[Any],
    backend_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    model
        An instance of an Outlines model.
    output_type
        The output type expressed as a Python type
    backend_name
        The name of the backend to use to create the logits processor.

    """
    self.model = model
    if output_type is None:
        self.logits_processor = None
    else:
        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            cfg_string = term.definition
            self.logits_processor = get_cfg_logits_processor(
                backend_name,
                model,
                cfg_string,
            )
        elif isinstance(term, JsonSchema):
            self.logits_processor = get_json_schema_logits_processor(
                backend_name,
                model,
                term.schema,
            )
        else:
            regex_string = to_regex(term)
            self.logits_processor = get_regex_logits_processor(
                backend_name,
                model,
                regex_string,
            )

`batch(prompts, **inference_kwargs)`

Generate a batch of responses from the model.

Parameters:

Name	Type	Description	Default
`prompts`	`List[Any]`	The list of prompts to use to generate a batch of responses.	required
`**inference_kwargs`		Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/generator.py

def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]:
    """Generate a batch of responses from the model.

    Parameters
    ----------
    prompts
        The list of prompts to use to generate a batch of responses.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    if self.logits_processor is not None:
        self.logits_processor.reset()
    return self.model.generate_batch(
        prompts, self.logits_processor, **inference_kwargs
    )

`from_processor(model, processor)` `classmethod`

Create a generator from a logits processor.

Parameters:

Name	Type	Description	Default
`model`	`SteerableModel`	An instance of an Outlines model.	required
`processor`	`LogitsProcessorType`	An instance of a logits processor.	required

Source code in outlines/generator.py

@classmethod
def from_processor(
    cls, model: SteerableModel, processor: LogitsProcessorType
):
    """Create a generator from a logits processor.

    Parameters
    ----------
    model
        An instance of an Outlines model.
    processor
        An instance of a logits processor.

    """
    instance = cls.__new__(cls)
    instance.model = model
    instance.logits_processor = processor

    return instance

`stream(prompt, **inference_kwargs)`

Generate a stream of responses from the model.

Parameters:

Name	Type	Description	Default
`prompt`	`Any`	The prompt to use to generate a response.	required
`**inference_kwargs`		Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/generator.py

def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    prompt
        The prompt to use to generate a response.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    if self.logits_processor is not None:
        self.logits_processor.reset()
    return self.model.generate_stream(
        prompt, self.logits_processor, **inference_kwargs
    )

`Generator(model, output_type=None, backend=None, *, processor=None)`

Create a generator for the given model and output parameters.

The 2 parameters output_type and processor are mutually exclusive. The parameters processor is only supported for SteerableModel instances (typically local models) and is intended to be only used by advanced users.

Parameters:

Name	Type	Description	Default
`model`	`Union[Model, AsyncModel]`	An instance of an Outlines model.	required
`output_type`	`Optional[Any]`	The output type expressed as a Python type or a type defined in the outlines.types.dsl module.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor. Only used for steerable models if there is an output type and `processor` is not provided.	`None`
`processor`	`Optional[LogitsProcessorType]`	An instance of a logits processor.	`None`

Returns:

Type	Description
`Union[SteerableGenerator, BlackBoxGenerator, AsyncBlackBoxGenerator]`	A generator instance.

Source code in outlines/generator.py

def Generator(
    model: Union[Model, AsyncModel],
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    *,
    processor: Optional[LogitsProcessorType] = None,
) -> Union[SteerableGenerator, BlackBoxGenerator, AsyncBlackBoxGenerator]:
    """Create a generator for the given model and output parameters.

    The 2 parameters output_type and processor are mutually exclusive. The
    parameters processor is only supported for SteerableModel instances
    (typically local models) and is intended to be only used by advanced users.

    Parameters
    ----------
    model
        An instance of an Outlines model.
    output_type
        The output type expressed as a Python type or a type defined in the
        outlines.types.dsl module.
    backend
        The name of the backend to use to create the logits processor. Only
        used for steerable models if there is an output type and `processor` is
        not provided.
    processor
        An instance of a logits processor.

    Returns
    -------
    Union[SteerableGenerator, BlackBoxGenerator, AsyncBlackBoxGenerator]
        A generator instance.

    """
    provided_output_params = sum(
        param is not None
        for param in [output_type, processor]
    )
    if provided_output_params > 1:
        raise ValueError(
            "At most one of output_type or processor can be provided"
        )

    if isinstance(model, SteerableModel): # type: ignore
        if processor is not None:
            return SteerableGenerator.from_processor(model, processor) # type: ignore
        else:
            return SteerableGenerator(model, output_type, backend) # type: ignore
    else:
        if processor is not None:
            raise NotImplementedError(
                "This model does not support logits processors"
            )
        if isinstance(model, AsyncBlackBoxModel): # type: ignore
            return AsyncBlackBoxGenerator(model, output_type) # type: ignore
        elif isinstance(model, BlackBoxModel): # type: ignore
            return BlackBoxGenerator(model, output_type) # type: ignore
        else:
            raise ValueError(
                "The model argument must be an instance of "
                "SteerableModel, BlackBoxModel or AsyncBlackBoxModel"
            )

`grammars`

A few common Lark grammars.

`read_grammar(grammar_file_name, base_grammar_path=GRAMMAR_PATH)`

Read grammar file from default grammar path.

Parameters:

Name	Type	Description	Default
`grammar_file_name`	`str`	The name of the grammar file to read.	required
`base_grammar_path`	`Path`	The path to the directory containing the grammar file.	`GRAMMAR_PATH`

Returns:

Type	Description
`str`	The contents of the grammar file.

Source code in outlines/grammars.py

def read_grammar(
    grammar_file_name: str,
    base_grammar_path: Path = GRAMMAR_PATH,
) -> str:
    """Read grammar file from default grammar path.

    Parameters
    ----------
    grammar_file_name
        The name of the grammar file to read.
    base_grammar_path
        The path to the directory containing the grammar file.

    Returns
    -------
    str
        The contents of the grammar file.

    """
    full_path = base_grammar_path / grammar_file_name
    with open(full_path) as file:
        return file.read()

`inputs`

Contain classes used to define the inputs of a model.

`Audio` `dataclass`

Contains an audio that can be passed to a multimodal model.

Provide one or several instances of this class along with a text prompt in a list as the model_input argument to a model that supports audio processing.

Parameters:

Name	Type	Description	Default
`audio`	`Any`	The audio to use in the text generation.	required

Source code in outlines/inputs.py

@dataclass
class Audio:
    """Contains an audio that can be passed to a multimodal model.

    Provide one or several instances of this class along with a text prompt
    in a list as the `model_input` argument to a model that supports audio
    processing.

    Parameters
    ----------
    audio
        The audio to use in the text generation.

    """
    audio: Any

`Chat` `dataclass`

Contains the input for a chat model.

Provide an instance of this class as the model_input argument to a model that supports chat.

Each message contained in the messages list must be a dict with 'role' and 'content' keys. The role can be 'user', 'assistant', or 'system'. The content supports either: - a text string, - a list containing text and assets (e.g., ["Describe...", Image(...)]), - only for HuggingFace transformers models, a list of dict items with explicit types (e.g., [{"type": "text", "text": "Describe..."}, {"type": "image", "image": Image(...)}])

Examples:

# Initialize the chat with a system message.
chat_prompt = Chat([
    {"role": "system", "content": "You are a helpful assistant."},
])

# Add a user message with an image and call the model (not shown here).
chat_prompt.add_user_message(["Describe the image below", Image(image)])

# Add as an assistant message the response from the model.
chat_prompt.add_assistant_message("There is a black cat sitting on a couch.")

Parameters:

Name	Type	Description	Default
`messages`	`List[Dict[str, Any]]`	The list of messages that will be provided to the model.	`None`

Source code in outlines/inputs.py

@dataclass
class Chat:
    """Contains the input for a chat model.

    Provide an instance of this class as the `model_input` argument to a model
    that supports chat.

    Each message contained in the messages list must be a dict with 'role' and
    'content' keys. The role can be 'user', 'assistant', or 'system'. The content
    supports either:
    - a text string,
    - a list containing text and assets (e.g., ["Describe...", Image(...)]),
    - only for HuggingFace transformers models, a list of dict items with explicit types (e.g.,
      [{"type": "text", "text": "Describe..."}, {"type": "image", "image": Image(...)}])

    Examples
    --------
    ```python
    # Initialize the chat with a system message.
    chat_prompt = Chat([
        {"role": "system", "content": "You are a helpful assistant."},
    ])

    # Add a user message with an image and call the model (not shown here).
    chat_prompt.add_user_message(["Describe the image below", Image(image)])

    # Add as an assistant message the response from the model.
    chat_prompt.add_assistant_message("There is a black cat sitting on a couch.")
    ```

    Parameters
    ----------
    messages
        The list of messages that will be provided to the model.

    """
    messages: List[Dict[str, Any]] = None # type: ignore

    def __post_init__(self):
        if self.messages is None:
            self.messages = []

    def append(self, message: Dict[str, Any]):
        """Add a message to the chat.

        Parameters
        ----------
        message
            The message to add to the chat.

        """
        self.messages.append(message)

    def extend(self, messages: List[Dict[str, Any]]):
        """Add a list of messages to the chat.

        Parameters
        ----------
        messages
            The list of messages to add to the chat.

        """
        self.messages.extend(messages)

    def pop(self) -> Dict[str, Any]:
        """Remove the last message from the chat.

        Returns
        -------
        message
            The removed message.

        """
        return self.messages.pop()

    def add_system_message(self, content: str | List[Any]):
        """Add a system message to the chat.

        Parameters
        ----------
        content
            The content of the system message.

        """
        self.messages.append({"role": "system", "content": content})

    def add_user_message(self, content: str | List[Any]):
        """Add a user message to the chat.

        Parameters
        ----------
        content
            The content of the user message.

        """
        self.messages.append({"role": "user", "content": content})

    def add_assistant_message(self, content: str | List[Any]):
        """Add an assistant message to the chat.

        Parameters
        ----------
        content
            The content of the assistant message.

        """
        self.messages.append({"role": "assistant", "content": content})

    def __str__(self):
        return "\n".join(str(message) for message in self.messages)

    def __repr__(self):
        return f"Chat(messages={self.messages})"

`add_assistant_message(content)`

Add an assistant message to the chat.

Parameters:

Name	Type	Description	Default
`content`	`str \| List[Any]`	The content of the assistant message.	required

Source code in outlines/inputs.py

def add_assistant_message(self, content: str | List[Any]):
    """Add an assistant message to the chat.

    Parameters
    ----------
    content
        The content of the assistant message.

    """
    self.messages.append({"role": "assistant", "content": content})

`add_system_message(content)`

Add a system message to the chat.

Parameters:

Name	Type	Description	Default
`content`	`str \| List[Any]`	The content of the system message.	required

Source code in outlines/inputs.py

def add_system_message(self, content: str | List[Any]):
    """Add a system message to the chat.

    Parameters
    ----------
    content
        The content of the system message.

    """
    self.messages.append({"role": "system", "content": content})

`add_user_message(content)`

Add a user message to the chat.

Parameters:

Name	Type	Description	Default
`content`	`str \| List[Any]`	The content of the user message.	required

Source code in outlines/inputs.py

def add_user_message(self, content: str | List[Any]):
    """Add a user message to the chat.

    Parameters
    ----------
    content
        The content of the user message.

    """
    self.messages.append({"role": "user", "content": content})

`append(message)`

Add a message to the chat.

Parameters:

Name	Type	Description	Default
`message`	`Dict[str, Any]`	The message to add to the chat.	required

Source code in outlines/inputs.py

def append(self, message: Dict[str, Any]):
    """Add a message to the chat.

    Parameters
    ----------
    message
        The message to add to the chat.

    """
    self.messages.append(message)

`extend(messages)`

Add a list of messages to the chat.

Parameters:

Name	Type	Description	Default
`messages`	`List[Dict[str, Any]]`	The list of messages to add to the chat.	required

Source code in outlines/inputs.py

def extend(self, messages: List[Dict[str, Any]]):
    """Add a list of messages to the chat.

    Parameters
    ----------
    messages
        The list of messages to add to the chat.

    """
    self.messages.extend(messages)

`pop()`

Remove the last message from the chat.

Returns:

Type	Description
`message`	The removed message.

Source code in outlines/inputs.py

def pop(self) -> Dict[str, Any]:
    """Remove the last message from the chat.

    Returns
    -------
    message
        The removed message.

    """
    return self.messages.pop()

`Image` `dataclass`

Contains an image that can be passed to a multimodal model.

Provide one or several instances of this class along with a text prompt in a list as the model_input argument to a model that supports vision.

Parameters:

Name	Type	Description	Default
`image`	`Image`	The image to use in the text generation.	required

Source code in outlines/inputs.py

@dataclass
class Image:
    """Contains an image that can be passed to a multimodal model.

    Provide one or several instances of this class along with a text prompt
    in a list as the `model_input` argument to a model that supports vision.

    Parameters
    ----------
    image
        The image to use in the text generation.

    """
    image: PILImage.Image

    def __post_init__(self):
        image = self.image

        if not image.format:
            raise TypeError(
                "Could not read the format of the image passed to the model."
            )

        buffer = BytesIO()
        image.save(buffer, format=image.format)
        self.image_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
        self.image_format = f"image/{image.format.lower()}"

`Video` `dataclass`

Contains a video that can be passed to a multimodal model.

Provide one or several instances of this class along with a text prompt in a list as the model_input argument to a model that supports video processing.

Parameters:

Name	Type	Description	Default
`video`	`Any`	The video to use in the text generation.	required

Source code in outlines/inputs.py

@dataclass
class Video:
    """Contains a video that can be passed to a multimodal model.

    Provide one or several instances of this class along with a text prompt
    in a list as the `model_input` argument to a model that supports video
    processing.

    Parameters
    ----------
    video
        The video to use in the text generation.

    """
    video: Any

`models`

Module that contains all the models integrated in outlines.

We group the models in submodules by provider instead of theme (completion, chat completion, diffusers, etc.) and use routing functions everywhere else in the codebase.

`Anthropic`

Bases: Model

Thin wrapper around the anthropic.Anthropic client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the anthropic.Anthropic client.

Source code in outlines/models/anthropic.py

class Anthropic(Model):
    """Thin wrapper around the `anthropic.Anthropic` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `anthropic.Anthropic` client.

    """
    def __init__(
        self, client: "AnthropicClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client
            An `anthropic.Anthropic` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = AnthropicTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            completion = self.client.messages.create(
                **messages,
                **inference_kwargs,
            )
        return completion.content[0].text

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Anthropic does not support batch generation."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.messages.create(
                **messages,
                stream=True,
                **inference_kwargs,
            )
            for chunk in stream:
                if (
                    chunk.type == "content_block_delta"
                    and chunk.delta.type == "text_delta"
                ):
                    yield chunk.delta.text

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Anthropic`	An `anthropic.Anthropic` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/anthropic.py

def __init__(
    self, client: "AnthropicClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client
        An `anthropic.Anthropic` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = AnthropicTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using Anthropic.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	As structured generation is not supported by Anthropic, the value of this argument must be `None`. Otherwise, an error will be raised at runtime.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The response generated by the model.

Source code in outlines/models/anthropic.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        completion = self.client.messages.create(
            **messages,
            **inference_kwargs,
        )
    return completion.content[0].text

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using Anthropic.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	As structured generation is not supported by Anthropic, the value of this argument must be `None`. Otherwise, an error will be raised at runtime.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/anthropic.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.messages.create(
            **messages,
            stream=True,
            **inference_kwargs,
        )
        for chunk in stream:
            if (
                chunk.type == "content_block_delta"
                and chunk.delta.type == "text_delta"
            ):
                yield chunk.delta.text

`AsyncDottxt`

Bases: AsyncModel

Async thin wrapper around the dottxt.client.AsyncDotTxt client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the dottxt.client.AsyncDotTxt client.

Source code in outlines/models/dottxt.py

class AsyncDottxt(AsyncModel):
    """Async thin wrapper around the `dottxt.client.AsyncDotTxt` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `dottxt.client.AsyncDotTxt`
    client.

    """

    def __init__(
        self,
        client: "AsyncDottxtClient",
        model: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            A `dottxt.AsyncDotTxt` client.
        model
            The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

        """
        self.client = client
        self.model = model
        self.type_adapter = DottxtTypeAdapter()

    async def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Dottxt asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model as a JSON string.

        """
        prompt = self.type_adapter.format_input(model_input)
        json_schema = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model is not None:
            inference_kwargs["model"] = self.model

        if "model" not in inference_kwargs:
            raise ValueError(
                "A model identifier is required. Pass it to `from_dottxt_async()` "
                "or as a `model=` keyword argument at generation time."
            )

        with normalize_provider_errors(PROVIDER):
            result = await self.client.generate(
                input=prompt,
                response_format=json_schema,
                **inference_kwargs,
            )

        return json.dumps(result)

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support batch generation."
        )

    async def generate_stream(  # type: ignore[override]
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support streaming. Call the model/generator for "
            + "regular generation instead."
        )
        yield  # makes this an async generator so `async for` can consume it

`init(client, model=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncDotTxt`	A `dottxt.AsyncDotTxt` client.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Source code in outlines/models/dottxt.py

def __init__(
    self,
    client: "AsyncDottxtClient",
    model: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        A `dottxt.AsyncDotTxt` client.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    """
    self.client = client
    self.model = model
    self.type_adapter = DottxtTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using Dottxt asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model as a JSON string.

Source code in outlines/models/dottxt.py

async def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Dottxt asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model as a JSON string.

    """
    prompt = self.type_adapter.format_input(model_input)
    json_schema = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model is not None:
        inference_kwargs["model"] = self.model

    if "model" not in inference_kwargs:
        raise ValueError(
            "A model identifier is required. Pass it to `from_dottxt_async()` "
            "or as a `model=` keyword argument at generation time."
        )

    with normalize_provider_errors(PROVIDER):
        result = await self.client.generate(
            input=prompt,
            response_format=json_schema,
            **inference_kwargs,
        )

    return json.dumps(result)

`AsyncLMStudio`

Bases: AsyncModel

Thin wrapper around a lmstudio.AsyncClient client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the LMStudio async client.

Source code in outlines/models/lmstudio.py

class AsyncLMStudio(AsyncModel):
    """Thin wrapper around a `lmstudio.AsyncClient` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the LMStudio async client.

    """

    def __init__(
        self, client: "AsyncClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client
            A LMStudio AsyncClient instance.
        model_name
            The name of the model to use. If not provided, uses the default
            loaded model in LMStudio.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = LMStudioTypeAdapter()
        self._context_entered = False

    async def close(self) -> None:
        """Close the async client and release resources."""
        if self._context_entered:
            await self.client.__aexit__(None, None, None)
            self._context_entered = False

    async def generate(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using LMStudio asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        str
            The text generated by the model.

        """
        if not self._context_entered:
            await self.client.__aenter__()
            self._context_entered = True

        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        result = await model.respond(formatted_input, **kwargs)
        return result.content

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `lmstudio` library does not support batch inference."
        )

    async def generate_stream(  # type: ignore
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using LMStudio asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        if not self._context_entered:
            await self.client.__aenter__()
            self._context_entered = True

        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        stream = await model.respond_stream(formatted_input, **kwargs)
        async for fragment in stream:
            yield fragment.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncClient`	A LMStudio AsyncClient instance.	required
`model_name`	`Optional[str]`	The name of the model to use. If not provided, uses the default loaded model in LMStudio.	`None`

Source code in outlines/models/lmstudio.py

def __init__(
    self, client: "AsyncClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client
        A LMStudio AsyncClient instance.
    model_name
        The name of the model to use. If not provided, uses the default
        loaded model in LMStudio.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = LMStudioTypeAdapter()
    self._context_entered = False

`close()` `async`

Close the async client and release resources.

Source code in outlines/models/lmstudio.py

async def close(self) -> None:
    """Close the async client and release resources."""
    if self._context_entered:
        await self.client.__aexit__(None, None, None)
        self._context_entered = False

`generate(model_input, output_type=None, **kwargs)` `async`

Generate text using LMStudio asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/lmstudio.py

async def generate(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using LMStudio asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    str
        The text generated by the model.

    """
    if not self._context_entered:
        await self.client.__aenter__()
        self._context_entered = True

    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    result = await model.respond(formatted_input, **kwargs)
    return result.content

`generate_stream(model_input, output_type=None, **kwargs)` `async`

Stream text using LMStudio asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/lmstudio.py

async def generate_stream(  # type: ignore
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using LMStudio asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    if not self._context_entered:
        await self.client.__aenter__()
        self._context_entered = True

    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    stream = await model.respond_stream(formatted_input, **kwargs)
    async for fragment in stream:
        yield fragment.content

`AsyncMistral`

Bases: AsyncModel

Async thin wrapper around the mistralai.Mistral client.

Converts input and output types to arguments for the mistralai.Mistral client's async methods (chat.complete_async or chat.stream_async).

Source code in outlines/models/mistral.py

class AsyncMistral(AsyncModel):
    """Async thin wrapper around the `mistralai.Mistral` client.

    Converts input and output types to arguments for the `mistralai.Mistral`
    client's async methods (`chat.complete_async` or `chat.stream_async`).

    """

    def __init__(
        self, client: "MistralClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client : MistralClient
            A mistralai.Mistral client instance.
        model_name : Optional[str]
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = MistralTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate a response from the model asynchronously.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs : Any
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The response generated by the model as text.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = await self.client.chat.complete_async(
                messages=messages,
                response_format=response_format,
                stream=False,
                **inference_kwargs,
            )

        outputs = [choice.message for choice in result.choices]

        if len(outputs) == 1:
            return outputs[0].content
        else:
            return [m.content for m in outputs]

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The mistralai library does not support batch inference."
        )

    async def generate_stream(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        """Generate text from the model as an async stream of chunks.

        Parameters
        ----------
        model_input
            str, list, or chat input to generate from.
        output_type
            Optional type for structured output.
        **inference_kwargs
            Extra kwargs like "model" name.

        Yields
        ------
        str
            Chunks of text as they are streamed.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.stream_async(
                messages=messages,
                response_format=response_format,
                **inference_kwargs
            )
            async for chunk in response:
                if (
                    hasattr(chunk, "data")
                    and chunk.data.choices
                    and len(chunk.data.choices) > 0
                    and hasattr(chunk.data.choices[0], "delta")
                    and chunk.data.choices[0].delta.content is not None
                ):
                    yield chunk.data.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/mistral.py

def __init__(
    self, client: "MistralClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = MistralTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate a response from the model asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The response generated by the model as text.

Source code in outlines/models/mistral.py

async def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate a response from the model asynchronously.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs : Any
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The response generated by the model as text.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = await self.client.chat.complete_async(
            messages=messages,
            response_format=response_format,
            stream=False,
            **inference_kwargs,
        )

    outputs = [choice.message for choice in result.choices]

    if len(outputs) == 1:
        return outputs[0].content
    else:
        return [m.content for m in outputs]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Generate text from the model as an async stream of chunks.

Parameters:

Name	Description	Default
`model_input`	str, list, or chat input to generate from.	required
`output_type`	Optional type for structured output.	`None`
`**inference_kwargs`	Extra kwargs like "model" name.	`{}`

Yields:

Type	Description
`str`	Chunks of text as they are streamed.

Source code in outlines/models/mistral.py

async def generate_stream(
    self,
    model_input,
    output_type=None,
    **inference_kwargs,
):
    """Generate text from the model as an async stream of chunks.

    Parameters
    ----------
    model_input
        str, list, or chat input to generate from.
    output_type
        Optional type for structured output.
    **inference_kwargs
        Extra kwargs like "model" name.

    Yields
    ------
    str
        Chunks of text as they are streamed.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.stream_async(
            messages=messages,
            response_format=response_format,
            **inference_kwargs
        )
        async for chunk in response:
            if (
                hasattr(chunk, "data")
                and chunk.data.choices
                and len(chunk.data.choices) > 0
                and hasattr(chunk.data.choices[0], "delta")
                and chunk.data.choices[0].delta.content is not None
            ):
                yield chunk.data.choices[0].delta.content

`AsyncOllama`

Bases: AsyncModel

Thin wrapper around the ollama.AsyncClient client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the ollama.AsyncClient client.

Source code in outlines/models/ollama.py

class AsyncOllama(AsyncModel):
    """Thin wrapper around the `ollama.AsyncClient` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `ollama.AsyncClient` client.

    """

    def __init__(
        self,client: "AsyncClient", model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `ollama.Client` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OllamaTypeAdapter()

    async def generate(self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                **kwargs,
            )
        return response.message.content

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `ollama` library does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                stream=True,
                **kwargs,
            )
            async for chunk in stream:
                yield chunk.message.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncClient`	The `ollama.Client` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/ollama.py

def __init__(
    self,client: "AsyncClient", model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `ollama.Client` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OllamaTypeAdapter()

`generate(model_input, output_type=None, **kwargs)` `async`

Generate text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/ollama.py

async def generate(self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )
    return response.message.content

`generate_stream(model_input, output_type=None, **kwargs)` `async`

Stream text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/ollama.py

async def generate_stream( # type: ignore
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            stream=True,
            **kwargs,
        )
        async for chunk in stream:
            yield chunk.message.content

`AsyncOpenAI`

Bases: AsyncModel

Thin wrapper around the openai.AsyncOpenAI client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.AsyncOpenAI client.

Source code in outlines/models/openai.py

class AsyncOpenAI(AsyncModel):
    """Thin wrapper around the `openai.AsyncOpenAI` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.AsyncOpenAI` client.

    """

    def __init__(
        self,
        client: Union["AsyncOpenAIClient", "AsyncAzureOpenAIClient"],
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OpenAITypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = await self.client.chat.completions.create(
                messages=messages,
                **response_format,
                **inference_kwargs,
            )

        messages = [choice.message for choice in result.choices]
        for message in messages:
            if message.refusal is not None:
                raise GenerationError(
                    f"OpenAI refused to answer the request: {message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `openai` library does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs,
    ) -> AsyncIterator[str]:
        """Stream text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                stream=True,
                messages=messages,
                **response_format,
                **inference_kwargs
            )
            async for chunk in stream:
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Union[AsyncOpenAI, AsyncAzureOpenAI]`	The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/openai.py

def __init__(
    self,
    client: Union["AsyncOpenAIClient", "AsyncAzureOpenAIClient"],
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OpenAITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/openai.py

async def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = await self.client.chat.completions.create(
            messages=messages,
            **response_format,
            **inference_kwargs,
        )

    messages = [choice.message for choice in result.choices]
    for message in messages:
        if message.refusal is not None:
            raise GenerationError(
                f"OpenAI refused to answer the request: {message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/openai.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs,
) -> AsyncIterator[str]:
    """Stream text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            stream=True,
            messages=messages,
            **response_format,
            **inference_kwargs
        )
        async for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`AsyncSGLang`

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py

class AsyncSGLang(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.
        model_name
            The name of the model to use.

        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using `sglang`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The SGLang server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "SGLang does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Return a text generator.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                **client_args,
                stream=True,
            )
            async for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`		An `openai.AsyncOpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Parameters:

Name	Type	Description	Default
`client`		An `openai.AsyncOpenAI` client instance.	required

Source code in outlines/models/sglang.py

def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.
    model_name
        The name of the model to use.

    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using sglang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/sglang.py

async def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using `sglang`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The SGLang server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Return a text generator.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Return a text generator.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )
        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`AsyncTGI`

Bases: AsyncModel

Thin async wrapper around a huggingface_hub.AsyncInferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.AsyncInferenceClient client.

Source code in outlines/models/tgi.py

class AsyncTGI(AsyncModel):
    """Thin async wrapper around a `huggingface_hub.AsyncInferenceClient`
    client used to communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.AsyncInferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `AsyncInferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    async def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.text_generation(**client_args)

        return response

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("TGI does not support batch inference.")

    async def generate_stream( # type: ignore
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.text_generation(
                **client_args, stream=True
            )
            async for chunk in stream:  # pragma: no cover
                yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

`init(client)`

Parameters:

Name	Type	Description	Default
`client`		A huggingface `AsyncInferenceClient` client instance.	required

Source code in outlines/models/tgi.py

def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `AsyncInferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/tgi.py

async def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.text_generation(**client_args)

    return response

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py

async def generate_stream( # type: ignore
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.text_generation(
            **client_args, stream=True
        )
        async for chunk in stream:  # pragma: no cover
            yield chunk

`AsyncVLLM`

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py

class AsyncVLLM(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "AsyncOpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("VLLM does not support batch inference.")

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.
        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                **client_args,
                stream=True,
            )
            async for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncOpenAI`	An `openai.AsyncOpenAI` client instance.	required

Source code in outlines/models/vllm.py

def __init__(
    self,
    client: "AsyncOpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/vllm.py

async def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.
    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )
        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`Dottxt`

Bases: Model

Thin wrapper around the dottxt.client.DotTxt client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the dottxt.client.DotTxt client.

Source code in outlines/models/dottxt.py

class Dottxt(Model):
    """Thin wrapper around the `dottxt.client.DotTxt` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `dottxt.client.DotTxt` client.

    """

    def __init__(
        self,
        client: "DottxtClient",
        model: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            A `dottxt.DotTxt` client.
        model
            The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

        """
        self.client = client
        self.model = model
        self.type_adapter = DottxtTypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Dottxt.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model as a JSON string.

        """
        prompt = self.type_adapter.format_input(model_input)
        json_schema = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model is not None:
            inference_kwargs["model"] = self.model

        if "model" not in inference_kwargs:
            raise ValueError(
                "A model identifier is required. Pass it to `from_dottxt()` "
                "or as a `model=` keyword argument at generation time."
            )

        with normalize_provider_errors(PROVIDER):
            result = self.client.generate(
                input=prompt,
                response_format=json_schema,
                **inference_kwargs,
            )

        return json.dumps(result)


    def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support batch generation."
        )

    def generate_stream(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support streaming. Call the model/generator for "
            + "regular generation instead."
        )

`init(client, model=None)`

Parameters:

Name	Type	Description	Default
`client`	`DotTxt`	A `dottxt.DotTxt` client.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Source code in outlines/models/dottxt.py

def __init__(
    self,
    client: "DottxtClient",
    model: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        A `dottxt.DotTxt` client.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    """
    self.client = client
    self.model = model
    self.type_adapter = DottxtTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using Dottxt.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model as a JSON string.

Source code in outlines/models/dottxt.py

def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Dottxt.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model as a JSON string.

    """
    prompt = self.type_adapter.format_input(model_input)
    json_schema = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model is not None:
        inference_kwargs["model"] = self.model

    if "model" not in inference_kwargs:
        raise ValueError(
            "A model identifier is required. Pass it to `from_dottxt()` "
            "or as a `model=` keyword argument at generation time."
        )

    with normalize_provider_errors(PROVIDER):
        result = self.client.generate(
            input=prompt,
            response_format=json_schema,
            **inference_kwargs,
        )

    return json.dumps(result)

`Gemini`

Bases: Model

Thin wrapper around the google.genai.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the google.genai.Client client.

Source code in outlines/models/gemini.py

class Gemini(Model):
    """Thin wrapper around the `google.genai.Client` client.

    This wrapper is used to convert the input and output types specified by
    the users at a higher level to arguments to the `google.genai.Client`
    client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            A `google.genai.Client` instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = GeminiTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> str:
        """Generate a response from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        with normalize_provider_errors(PROVIDER):
            completion = self.client.models.generate_content(
                **contents,
                model=inference_kwargs.pop("model", self.model_name),
                config={**generation_config, **inference_kwargs}
            )

        return completion.text

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Gemini does not support batch generation."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        with normalize_provider_errors(PROVIDER):
            stream = self.client.models.generate_content_stream(
                **contents,
                model=inference_kwargs.pop("model", self.model_name),
                config={**generation_config, **inference_kwargs},
            )
            for chunk in stream:
                if hasattr(chunk, "text") and chunk.text:
                    yield chunk.text

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	A `google.genai.Client` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/gemini.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = GeminiTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate a response from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The response generated by the model.

Source code in outlines/models/gemini.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> str:
    """Generate a response from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    with normalize_provider_errors(PROVIDER):
        completion = self.client.models.generate_content(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs}
        )

    return completion.text

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Generate a stream of responses from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/gemini.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    with normalize_provider_errors(PROVIDER):
        stream = self.client.models.generate_content_stream(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs},
        )
        for chunk in stream:
            if hasattr(chunk, "text") and chunk.text:
                yield chunk.text

`LMStudio`

Bases: Model

Thin wrapper around a lmstudio.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the LMStudio client.

Source code in outlines/models/lmstudio.py

class LMStudio(Model):
    """Thin wrapper around a `lmstudio.Client` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the LMStudio client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            A LMStudio Client instance obtained via `lmstudio.Client()` or
            `lmstudio.get_default_client()`.
        model_name
            The name of the model to use. If not provided, uses the default
            loaded model in LMStudio.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = LMStudioTypeAdapter()

    def generate(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using LMStudio.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        result = model.respond(formatted_input, **kwargs)
        return result.content

    def generate_batch(
        self,
        model_input,
        output_type=None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `lmstudio` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using LMStudio.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        stream = model.respond_stream(formatted_input, **kwargs)
        for fragment in stream:
            yield fragment.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	A LMStudio Client instance obtained via `lmstudio.Client()` or `lmstudio.get_default_client()`.	required
`model_name`	`Optional[str]`	The name of the model to use. If not provided, uses the default loaded model in LMStudio.	`None`

Source code in outlines/models/lmstudio.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        A LMStudio Client instance obtained via `lmstudio.Client()` or
        `lmstudio.get_default_client()`.
    model_name
        The name of the model to use. If not provided, uses the default
        loaded model in LMStudio.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = LMStudioTypeAdapter()

`generate(model_input, output_type=None, **kwargs)`

Generate text using LMStudio.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/lmstudio.py

def generate(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using LMStudio.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    result = model.respond(formatted_input, **kwargs)
    return result.content

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using LMStudio.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/lmstudio.py

def generate_stream(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> Iterator[str]:
    """Stream text using LMStudio.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    stream = model.respond_stream(formatted_input, **kwargs)
    for fragment in stream:
        yield fragment.content

`LlamaCpp`

Bases: Model

Thin wrapper around the llama_cpp.Llama model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the llama_cpp.Llama model.

Source code in outlines/models/llamacpp.py

class LlamaCpp(Model):
    """Thin wrapper around the `llama_cpp.Llama` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `llama_cpp.Llama` model.
    """

    tensor_library_name = "numpy"

    def __init__(self, model: "Llama", chat_mode: bool = True):
        """
        Parameters
        ----------
        model
            A `llama_cpp.Llama` model instance.
        chat_mode
            Whether to enable chat mode. If `False`, the model will regard
            all `str` inputs as plain text prompts. If `True`, the model will
            regard all `str` inputs as user messages in a chat conversation.

        """
        self.model = model
        self.tokenizer = LlamaCppTokenizer(self.model)

        # Note: llama-cpp-python provides a default chat-template fallback even when
        # the user hasn't explicitly configured one:
        # https://github.com/abetlen/llama-cpp-python/blob/c37132b/llama_cpp/llama.py#L540-L545
        # We keep the default as True because the upstream library generally favors chat-style usage.
        self.type_adapter = LlamaCppTypeAdapter(has_chat_template=chat_mode)

    def generate(
        self,
        model_input: Union[Chat, str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        prompt = self.type_adapter.format_input(model_input)

        if isinstance(prompt, str):
            completion = self.model(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                **inference_kwargs,
            )
            result = completion["choices"][0]["text"]
        elif isinstance(prompt, list):
            completion = self.model.create_chat_completion(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                **inference_kwargs,
            )
            result = completion["choices"][0]["message"]["content"]
        else:  # Never reached  # pragma: no cover
            raise ValueError("Unexpected prompt type.")

        self.model.reset()

        return result

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("LlamaCpp does not support batch generation.")

    def generate_stream(
        self,
        model_input: Union[Chat, str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        prompt = self.type_adapter.format_input(model_input)

        if isinstance(prompt, str):
            generator = self.model(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                stream=True,
                **inference_kwargs,
            )
            for chunk in generator:
                yield chunk["choices"][0]["text"]

        elif isinstance(prompt, list):
            generator = self.model.create_chat_completion(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                stream=True,
                **inference_kwargs,
            )
            for chunk in generator:
                yield chunk["choices"][0]["delta"].get("content", "")
        else:  # Never reached  # pragma: no cover
            raise ValueError("Unexpected prompt type.")

`init(model, chat_mode=True)`

Parameters:

Name	Type	Description	Default
`model`	`Llama`	A `llama_cpp.Llama` model instance.	required
`chat_mode`	`bool`	Whether to enable chat mode. If `False`, the model will regard all `str` inputs as plain text prompts. If `True`, the model will regard all `str` inputs as user messages in a chat conversation.	`True`

Source code in outlines/models/llamacpp.py

def __init__(self, model: "Llama", chat_mode: bool = True):
    """
    Parameters
    ----------
    model
        A `llama_cpp.Llama` model instance.
    chat_mode
        Whether to enable chat mode. If `False`, the model will regard
        all `str` inputs as plain text prompts. If `True`, the model will
        regard all `str` inputs as user messages in a chat conversation.

    """
    self.model = model
    self.tokenizer = LlamaCppTokenizer(self.model)

    # Note: llama-cpp-python provides a default chat-template fallback even when
    # the user hasn't explicitly configured one:
    # https://github.com/abetlen/llama-cpp-python/blob/c37132b/llama_cpp/llama.py#L540-L545
    # We keep the default as True because the upstream library generally favors chat-style usage.
    self.type_adapter = LlamaCppTypeAdapter(has_chat_template=chat_mode)

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using llama-cpp-python.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the `Llama.__call__` method of the `llama-cpp-python` library.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/llamacpp.py

def generate(
    self,
    model_input: Union[Chat, str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    prompt = self.type_adapter.format_input(model_input)

    if isinstance(prompt, str):
        completion = self.model(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            **inference_kwargs,
        )
        result = completion["choices"][0]["text"]
    elif isinstance(prompt, list):
        completion = self.model.create_chat_completion(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            **inference_kwargs,
        )
        result = completion["choices"][0]["message"]["content"]
    else:  # Never reached  # pragma: no cover
        raise ValueError("Unexpected prompt type.")

    self.model.reset()

    return result

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using llama-cpp-python.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the `Llama.__call__` method of the `llama-cpp-python` library.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/llamacpp.py

def generate_stream(
    self,
    model_input: Union[Chat, str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    prompt = self.type_adapter.format_input(model_input)

    if isinstance(prompt, str):
        generator = self.model(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            stream=True,
            **inference_kwargs,
        )
        for chunk in generator:
            yield chunk["choices"][0]["text"]

    elif isinstance(prompt, list):
        generator = self.model.create_chat_completion(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            stream=True,
            **inference_kwargs,
        )
        for chunk in generator:
            yield chunk["choices"][0]["delta"].get("content", "")
    else:  # Never reached  # pragma: no cover
        raise ValueError("Unexpected prompt type.")

`MLXLM`

Bases: Model

Thin wrapper around an mlx_lm model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the mlx_lm library.

Source code in outlines/models/mlxlm.py

class MLXLM(Model):
    """Thin wrapper around an `mlx_lm` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `mlx_lm` library.

    """

    tensor_library_name = "mlx"

    def __init__(
        self,
        model: "nn.Module",
        tokenizer: "MLXTokenizer",
    ):
        """
        Parameters
        ----------
        model
            An instance of an `mlx_lm` model.
        tokenizer
            An instance of an `mlx_lm` tokenizer or of a compatible
            `transformers` tokenizer.

        """
        self.model = model
        # self.mlx_tokenizer is used by the mlx-lm in its generate function
        self.mlx_tokenizer = tokenizer
        # self.tokenizer is used by the logits processor
        # tokenizer may be a mlx_lm.TokenizerWrapper (whose ._tokenizer is a
        # PreTrainedTokenizerFast) or a PreTrainedTokenizerFast passed directly
        inner = getattr(tokenizer, "_tokenizer", tokenizer)
        hf_tokenizer = inner if isinstance(inner, PreTrainedTokenizerBase) else tokenizer
        self.tokenizer = TransformerTokenizer(hf_tokenizer)
        self.type_adapter = MLXLMTypeAdapter(
            tokenizer=tokenizer,
            has_chat_template=_check_hf_chat_template(tokenizer)
        )

    def generate(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> str:
        """Generate text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        from mlx_lm import generate

        return generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )

    def generate_batch(
        self,
        model_input: list[str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> list[str]:
        """Generate a batch of text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The list of prompts based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        list[str]
            The list of text generated by the model.

        """
        from mlx_lm import batch_generate

        if output_type:
            raise NotImplementedError(
                "mlx-lm does not support constrained generation with batching."
                + "You cannot provide an `output_type` with this method."
            )

        model_input = [self.type_adapter.format_input(item) for item in model_input]

        # Contrarily to the other generate methods, batch_generate requires
        # tokenized prompts
        add_special_tokens = [
            (
                self.mlx_tokenizer.bos_token is None
                or not prompt.startswith(self.mlx_tokenizer.bos_token)
            )
            for prompt in model_input
        ]
        tokenized_model_input = [
            self.mlx_tokenizer.encode(
                model_input[i], add_special_tokens=add_special_tokens[i]
            )
            for i in range(len(model_input))
        ]

        response = batch_generate(
            self.model,
            self.mlx_tokenizer,
            tokenized_model_input,
            **kwargs,
        )

        return response.texts

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> Iterator[str]:
        """Stream text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        from mlx_lm import stream_generate

        for gen_response in stream_generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        ):
            yield gen_response.text

`init(model, tokenizer)`

Parameters:

Name	Type	Description	Default
`model`	`Module`	An instance of an `mlx_lm` model.	required
`tokenizer`	`MLXTokenizer`	An instance of an `mlx_lm` tokenizer or of a compatible `transformers` tokenizer.	required

Source code in outlines/models/mlxlm.py

def __init__(
    self,
    model: "nn.Module",
    tokenizer: "MLXTokenizer",
):
    """
    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        `transformers` tokenizer.

    """
    self.model = model
    # self.mlx_tokenizer is used by the mlx-lm in its generate function
    self.mlx_tokenizer = tokenizer
    # self.tokenizer is used by the logits processor
    # tokenizer may be a mlx_lm.TokenizerWrapper (whose ._tokenizer is a
    # PreTrainedTokenizerFast) or a PreTrainedTokenizerFast passed directly
    inner = getattr(tokenizer, "_tokenizer", tokenizer)
    hf_tokenizer = inner if isinstance(inner, PreTrainedTokenizerBase) else tokenizer
    self.tokenizer = TransformerTokenizer(hf_tokenizer)
    self.type_adapter = MLXLMTypeAdapter(
        tokenizer=tokenizer,
        has_chat_template=_check_hf_chat_template(tokenizer)
    )

`generate(model_input, output_type=None, **kwargs)`

Generate text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/mlxlm.py

def generate(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> str:
    """Generate text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    from mlx_lm import generate

    return generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    )

`generate_batch(model_input, output_type=None, **kwargs)`

Generate a batch of text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`list[str]`	The list of prompts based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`list[str]`	The list of text generated by the model.

Source code in outlines/models/mlxlm.py

def generate_batch(
    self,
    model_input: list[str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> list[str]:
    """Generate a batch of text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The list of prompts based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    list[str]
        The list of text generated by the model.

    """
    from mlx_lm import batch_generate

    if output_type:
        raise NotImplementedError(
            "mlx-lm does not support constrained generation with batching."
            + "You cannot provide an `output_type` with this method."
        )

    model_input = [self.type_adapter.format_input(item) for item in model_input]

    # Contrarily to the other generate methods, batch_generate requires
    # tokenized prompts
    add_special_tokens = [
        (
            self.mlx_tokenizer.bos_token is None
            or not prompt.startswith(self.mlx_tokenizer.bos_token)
        )
        for prompt in model_input
    ]
    tokenized_model_input = [
        self.mlx_tokenizer.encode(
            model_input[i], add_special_tokens=add_special_tokens[i]
        )
        for i in range(len(model_input))
    ]

    response = batch_generate(
        self.model,
        self.mlx_tokenizer,
        tokenized_model_input,
        **kwargs,
    )

    return response.texts

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/mlxlm.py

def generate_stream(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> Iterator[str]:
    """Stream text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    from mlx_lm import stream_generate

    for gen_response in stream_generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    ):
        yield gen_response.text

`Mistral`

Bases: Model

Thin wrapper around the mistralai.Mistral client.

Converts input and output types to arguments for the mistralai.Mistral client's chat.complete or chat.stream methods.

Source code in outlines/models/mistral.py

class Mistral(Model):
    """Thin wrapper around the `mistralai.Mistral` client.

    Converts input and output types to arguments for the `mistralai.Mistral`
    client's `chat.complete` or `chat.stream` methods.

    """

    def __init__(
        self, client: "MistralClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client : MistralClient
            A mistralai.Mistral client instance.
        model_name : Optional[str]
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = MistralTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate a response from the model.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs : Any
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The response generated by the model as text.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = self.client.chat.complete(
                messages=messages,
                response_format=response_format,
                **inference_kwargs,
            )

        outputs = [choice.message for choice in result.choices]

        if len(outputs) == 1:
            return outputs[0].content
        else:
            return [m.content for m in outputs]

    def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `mistralai` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text chunks generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.stream(
                messages=messages,
                response_format=response_format,
                **inference_kwargs
            )
            for chunk in stream:
                if (
                    hasattr(chunk, "data")
                    and chunk.data.choices
                    and chunk.data.choices[0].delta.content is not None
                ):
                    yield chunk.data.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/mistral.py

def __init__(
    self, client: "MistralClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = MistralTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate a response from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The response generated by the model as text.

Source code in outlines/models/mistral.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate a response from the model.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs : Any
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The response generated by the model as text.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = self.client.chat.complete(
            messages=messages,
            response_format=response_format,
            **inference_kwargs,
        )

    outputs = [choice.message for choice in result.choices]

    if len(outputs) == 1:
        return outputs[0].content
    else:
        return [m.content for m in outputs]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Generate a stream of responses from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text chunks generated by the model.

Source code in outlines/models/mistral.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text chunks generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.stream(
            messages=messages,
            response_format=response_format,
            **inference_kwargs
        )
        for chunk in stream:
            if (
                hasattr(chunk, "data")
                and chunk.data.choices
                and chunk.data.choices[0].delta.content is not None
            ):
                yield chunk.data.choices[0].delta.content

`Model`

Bases: ABC

Base class for all synchronous models.

This class defines shared __call__, batch and stream methods that can be used to call the model directly. The generate, generate_batch, and generate_stream methods must be implemented by the subclasses. All models inheriting from this class must define a type_adapter attribute of type ModelTypeAdapter. The methods of the type_adapter attribute are used in the generate, generate_batch, and generate_stream methods to format the input and output types received by the model. Additionally, steerable models must define a tensor_library_name attribute.

Source code in outlines/models/base.py

class Model(ABC):
    """Base class for all synchronous models.

    This class defines shared `__call__`, `batch` and `stream` methods that can
    be used to call the model directly. The `generate`, `generate_batch`, and
    `generate_stream` methods must be implemented by the subclasses.
    All models inheriting from this class must define a `type_adapter`
    attribute of type `ModelTypeAdapter`. The methods of the `type_adapter`
    attribute are used in the `generate`, `generate_batch`, and
    `generate_stream` methods to format the input and output types received by
    the model.
    Additionally, steerable models must define a `tensor_library_name`
    attribute.

    """
    type_adapter: ModelTypeAdapter
    tensor_library_name: str

    def __call__(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Call the model.

        Users can call the model directly, in which case we will create a
        generator instance with the output type provided and call it.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        generator("prompt")
        ```
        and
        ```python
        model("prompt", Foo)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        from outlines.generator import Generator

        return Generator(self, output_type, backend)(model_input, **inference_kwargs)

    def batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Make a batch call to the model (several inputs at once).

        Users can use the `batch` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `batch` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        generator.batch(["prompt1", "prompt2"])
        ```
        and
        ```python
        model.batch(["prompt1", "prompt2"], Foo)
        ```

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return generator.batch(model_input, **inference_kwargs) # type: ignore

    def stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Stream a response from the model.

        Users can use the `stream` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `stream` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        for chunk in generator("prompt"):
            print(chunk)
        ```
        and
        ```python
        for chunk in model.stream("prompt", Foo):
            print(chunk)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return generator.stream(model_input, **inference_kwargs) # type: ignore

    @abstractmethod
    def generate(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Generate a response from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        ...

    @abstractmethod
    def generate_batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Generate a batch of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        ...
    @abstractmethod
    def generate_stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Generate a stream of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        ...

`call(model_input, output_type=None, backend=None, **inference_kwargs)`

Call the model.

Users can call the model directly, in which case we will create a generator instance with the output type provided and call it. Thus, those commands are equivalent:

generator = Generator(model, Foo)
generator("prompt")

and

model("prompt", Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

def __call__(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> Any:
    """Call the model.

    Users can call the model directly, in which case we will create a
    generator instance with the output type provided and call it.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    generator("prompt")
    ```
    and
    ```python
    model("prompt", Foo)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    from outlines.generator import Generator

    return Generator(self, output_type, backend)(model_input, **inference_kwargs)

`batch(model_input, output_type=None, backend=None, **inference_kwargs)`

Make a batch call to the model (several inputs at once).

Users can use the batch method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its batch method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
generator.batch(["prompt1", "prompt2"])

and

model.batch(["prompt1", "prompt2"], Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

def batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Make a batch call to the model (several inputs at once).

    Users can use the `batch` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `batch` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    generator.batch(["prompt1", "prompt2"])
    ```
    and
    ```python
    model.batch(["prompt1", "prompt2"], Foo)
    ```

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return generator.batch(model_input, **inference_kwargs) # type: ignore

`generate(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a response from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

@abstractmethod
def generate(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Generate a response from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    ...

`generate_batch(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a batch of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

@abstractmethod
def generate_batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Generate a batch of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    ...

`generate_stream(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a stream of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[Any]`	A stream of responses from the model.

Source code in outlines/models/base.py

@abstractmethod
def generate_stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Generate a stream of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    ...

`stream(model_input, output_type=None, backend=None, **inference_kwargs)`

Stream a response from the model.

Users can use the stream method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its stream method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
for chunk in generator("prompt"):
    print(chunk)

and

for chunk in model.stream("prompt", Foo):
    print(chunk)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[Any]`	A stream of responses from the model.

Source code in outlines/models/base.py

def stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Stream a response from the model.

    Users can use the `stream` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `stream` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    for chunk in generator("prompt"):
        print(chunk)
    ```
    and
    ```python
    for chunk in model.stream("prompt", Foo):
        print(chunk)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return generator.stream(model_input, **inference_kwargs) # type: ignore

`ModelTypeAdapter`

Bases: ABC

Base class for all model type adapters.

A type adapter instance must be given as a value to the type_adapter attribute when instantiating a model. The type adapter is responsible for formatting the input and output types passed to the model to match the specific format expected by the associated model.

Source code in outlines/models/base.py

class ModelTypeAdapter(ABC):
    """Base class for all model type adapters.

    A type adapter instance must be given as a value to the `type_adapter`
    attribute when instantiating a model.
    The type adapter is responsible for formatting the input and output types
    passed to the model to match the specific format expected by the
    associated model.

    """

    @abstractmethod
    def format_input(self, model_input: Any) -> Any:
        """Format the user input to the expected format of the model.

        For API-based models, it typically means creating the `messages`
        argument passed to the client. For local models, it can mean casting
        the input from str to list for instance.
        This method is also used to validate that the input type provided by
        the user is supported by the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        Any
            The formatted input to be passed to the model.

        """
        ...

    @abstractmethod
    def format_output_type(self, output_type: Optional[Any] = None) -> Any:
        """Format the output type to the expected format of the model.

        For black-box models, this typically means creating a `response_format`
        argument. For steerable models, it means formatting the logits processor
        to create the object type expected by the model.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        Any
            The formatted output type to be passed to the model.

        """
        ...

`format_input(model_input)` `abstractmethod`

Format the user input to the expected format of the model.

For API-based models, it typically means creating the messages argument passed to the client. For local models, it can mean casting the input from str to list for instance. This method is also used to validate that the input type provided by the user is supported by the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required

Returns:

Type	Description
`Any`	The formatted input to be passed to the model.

Source code in outlines/models/base.py

@abstractmethod
def format_input(self, model_input: Any) -> Any:
    """Format the user input to the expected format of the model.

    For API-based models, it typically means creating the `messages`
    argument passed to the client. For local models, it can mean casting
    the input from str to list for instance.
    This method is also used to validate that the input type provided by
    the user is supported by the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    Any
        The formatted input to be passed to the model.

    """
    ...

`format_output_type(output_type=None)` `abstractmethod`

Format the output type to the expected format of the model.

For black-box models, this typically means creating a response_format argument. For steerable models, it means formatting the logits processor to create the object type expected by the model.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`Any`	The formatted output type to be passed to the model.

Source code in outlines/models/base.py

@abstractmethod
def format_output_type(self, output_type: Optional[Any] = None) -> Any:
    """Format the output type to the expected format of the model.

    For black-box models, this typically means creating a `response_format`
    argument. For steerable models, it means formatting the logits processor
    to create the object type expected by the model.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    Any
        The formatted output type to be passed to the model.

    """
    ...

`Ollama`

Bases: Model

Thin wrapper around the ollama.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the ollama.Client client.

Source code in outlines/models/ollama.py

class Ollama(Model):
    """Thin wrapper around the `ollama.Client` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `ollama.Client` client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            The `ollama.Client` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OllamaTypeAdapter()

    def generate(self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        print(self.type_adapter.format_input(model_input))

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                **kwargs,
            )

        return response.message.content

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `ollama` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                stream=True,
                **kwargs,
            )
            for chunk in response:
                yield chunk.message.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	The `ollama.Client` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/ollama.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        The `ollama.Client` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OllamaTypeAdapter()

`generate(model_input, output_type=None, **kwargs)`

Generate text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/ollama.py

def generate(self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    print(self.type_adapter.format_input(model_input))

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )

    return response.message.content

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/ollama.py

def generate_stream(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> Iterator[str]:
    """Stream text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            stream=True,
            **kwargs,
        )
        for chunk in response:
            yield chunk.message.content

`OpenAI`

Bases: Model

Thin wrapper around the openai.OpenAI client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client.

Source code in outlines/models/openai.py

class OpenAI(Model):
    """Thin wrapper around the `openai.OpenAI` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client.

    """

    def __init__(
        self,
        client: Union["OpenAIClient", "AzureOpenAIClient"],
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `openai.OpenAI` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OpenAITypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = self.client.chat.completions.create(
                messages=messages,
                **response_format,
                **inference_kwargs,
            )

        messages = [choice.message for choice in result.choices]
        for message in messages:
            if message.refusal is not None:
                raise GenerationError(
                    f"OpenAI refused to answer the request: {message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `openai` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Stream text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                stream=True,
                messages=messages,
                **response_format,
                **inference_kwargs
            )
            for chunk in stream:
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AzureOpenAI]`	The `openai.OpenAI` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/openai.py

def __init__(
    self,
    client: Union["OpenAIClient", "AzureOpenAIClient"],
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `openai.OpenAI` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OpenAITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/openai.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = self.client.chat.completions.create(
            messages=messages,
            **response_format,
            **inference_kwargs,
        )

    messages = [choice.message for choice in result.choices]
    for message in messages:
        if message.refusal is not None:
            raise GenerationError(
                f"OpenAI refused to answer the request: {message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/openai.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Stream text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            stream=True,
            messages=messages,
            **response_format,
            **inference_kwargs
        )
        for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`SGLang`

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py

class SGLang(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The SGLang server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "SGLang does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`		An `openai.OpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/sglang.py

def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using SGLang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/sglang.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The SGLang server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using SGLang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`TGI`

Bases: Model

Thin wrapper around a huggingface_hub.InferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.InferenceClient client.

Source code in outlines/models/tgi.py

class TGI(Model):
    """Thin wrapper around a `huggingface_hub.InferenceClient` client used to
    communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.InferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `InferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            return self.client.text_generation(**client_args)

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("TGI does not support batch inference.")

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.text_generation(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

`init(client)`

Parameters:

Name	Type	Description	Default
`client`		A huggingface `InferenceClient` client instance.	required

Source code in outlines/models/tgi.py

def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `InferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/tgi.py

def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        return self.client.text_generation(**client_args)

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py

def generate_stream(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.text_generation(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            yield chunk

`TransformerTokenizer`

Bases: Tokenizer

Represents a tokenizer for models in the transformers library.

Source code in outlines/models/transformers.py

class TransformerTokenizer(Tokenizer):
    """Represents a tokenizer for models in the `transformers` library."""

    def __init__(self, tokenizer: "PreTrainedTokenizer", **kwargs):
        self.tokenizer = tokenizer
        self.eos_token_id = self.tokenizer.eos_token_id
        self.eos_token = self.tokenizer.eos_token
        self.get_vocab = self.tokenizer.get_vocab

        if self.tokenizer.pad_token_id is None:
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
            self.pad_token_id = self.eos_token_id
        else:
            self.pad_token_id = self.tokenizer.pad_token_id
            self.pad_token = self.tokenizer.pad_token

        self.special_tokens = set(self.tokenizer.all_special_tokens)

        self.vocabulary = self.tokenizer.get_vocab()
        self.is_llama = isinstance(self.tokenizer, get_llama_tokenizer_types())

    def encode(
        self, prompt: Union[str, List[str]], **kwargs
    ) -> Tuple["torch.LongTensor", "torch.LongTensor"]:
        kwargs["padding"] = True
        kwargs["return_tensors"] = "pt"
        output = self.tokenizer(prompt, **kwargs)
        return output["input_ids"], output["attention_mask"]

    def decode(self, token_ids: "torch.LongTensor") -> List[str]:
        text = self.tokenizer.batch_decode(token_ids, skip_special_tokens=True)
        return text

    def convert_token_to_string(self, token: str) -> str:
        string = self.tokenizer.convert_tokens_to_string([token])

        if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
            return " " + string

        return string

    def __eq__(self, other):
        if isinstance(other, type(self)):
            if hasattr(self, "model_name") and hasattr(self, "kwargs"):
                return (
                    other.model_name == self.model_name and other.kwargs == self.kwargs
                )
            else:
                return other.tokenizer == self.tokenizer
        return NotImplemented

    def __hash__(self):
        from datasets.fingerprint import Hasher

        return hash(Hasher.hash(self.tokenizer))

    def __getstate__(self):
        state = {"tokenizer": self.tokenizer}
        return state

    def __setstate__(self, state):
        self.__init__(state["tokenizer"])

`Transformers`

Bases: Model

Thin wrapper around a transformers model and a transformers tokenizer.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and tokenizer.

Source code in outlines/models/transformers.py

class Transformers(Model):
    """Thin wrapper around a `transformers` model and a `transformers`
    tokenizer.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    tokenizer.

    """

    def __init__(
        self,
        model: "PreTrainedModel",
        tokenizer: "PreTrainedTokenizer",
        *,
        device_dtype: Optional["torch.dtype"] = None,
    ):
        """
        Parameters:
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        tokenizer
            A `PreTrainedTokenizer`, or any tokenizer that is compatible with
            the `transformers` API for tokenizers.
        device_dtype
            The dtype to use for the model. If not provided, the model will use
            the default dtype.

        """
        # We need to handle the cases in which jax/flax or tensorflow
        # is not available in the environment.
        try:
            from transformers import FlaxPreTrainedModel
        except ImportError:  # pragma: no cover
            FlaxPreTrainedModel = None

        try:
            from transformers import TFPreTrainedModel
        except ImportError:  # pragma: no cover
            TFPreTrainedModel = None

        tokenizer.padding_side = "left"
        self.model = model
        self.hf_tokenizer = tokenizer
        self.tokenizer = TransformerTokenizer(tokenizer)
        self.device_dtype = device_dtype
        self.type_adapter = TransformersTypeAdapter(
            tokenizer=tokenizer,
            has_chat_template=_check_hf_chat_template(tokenizer)
        )

        if (
            FlaxPreTrainedModel is not None
            and isinstance(model, FlaxPreTrainedModel)
        ):  # pragma: no cover
            self.tensor_library_name = "jax"
            warnings.warn("""
                Support for `jax` has been deprecated and will be removed in
                version 1.4.0 of Outlines. Please use `torch` instead.
                Transformers models using `jax` do not support structured
                generation.
                """,
                DeprecationWarning,
                stacklevel=2,
            )
        elif (
            TFPreTrainedModel is not None
            and isinstance(model, TFPreTrainedModel)
        ):  # pragma: no cover
            self.tensor_library_name = "tensorflow"
            warnings.warn("""
                Support for `tensorflow` has been deprecated and will be removed in
                version 1.4.0 of Outlines. Please use `torch` instead.
                Transformers models using `tensorflow` do not support structured
                generation.
                """,
                DeprecationWarning,
                stacklevel=2,
            )
        else:
            self.tensor_library_name = "torch"

    def _prepare_model_inputs(
        self,
        model_input,
        is_batch: bool = False,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        # Format validation
        if is_batch:
            prompts = [
                self.type_adapter.format_input(item)
                for item in model_input
            ]
        else:
            prompts = self.type_adapter.format_input(model_input)
        input_ids, attention_mask = self.tokenizer.encode(prompts)
        inputs = {
            "input_ids": input_ids.to(self.model.device),
            "attention_mask": (
                attention_mask.to(self.model.device, dtype=self.device_dtype)
                if self.device_dtype is not None
                else attention_mask.to(self.model.device)
            ),
        }

        return prompts, inputs

    def generate(
        self,
        model_input: Union[str, dict, Chat],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str]]:
        """Generate text using `transformers`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response. For
            multi-modal models, the input should be a dictionary containing the
            `text` key with a value of type `Union[str, List[str]]` and the
            other keys required by the model.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            of the `transformers` model.

        Returns
        -------
        Union[str, List[str]]
            The text generated by the model.

        """
        prompts, inputs = self._prepare_model_inputs(model_input, False)
        logits_processor = self.type_adapter.format_output_type(output_type)

        generated_ids = self._generate_output_seq(
            prompts,
            inputs,
            logits_processor=logits_processor,
            **inference_kwargs,
        )

        # required for multi-modal models that return a 2D tensor even when
        # num_return_sequences is 1
        num_samples = inference_kwargs.get("num_return_sequences", 1)
        if num_samples == 1 and len(generated_ids.shape) == 2:
            generated_ids = generated_ids.squeeze(0)

        return self._decode_generation(generated_ids)

    def generate_batch(
        self,
        model_input: List[Union[str, dict, Chat]],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> List[Union[str, List[str]]]:
        """"""
        prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore
        logits_processor = self.type_adapter.format_output_type(output_type)

        generated_ids = self._generate_output_seq(
            prompts, inputs, logits_processor=logits_processor, **inference_kwargs
        )

        # if there are multiple samples per input, convert generated_id to 3D
        num_samples = inference_kwargs.get("num_return_sequences", 1)
        if num_samples > 1:
            generated_ids = generated_ids.view(len(model_input), num_samples, -1)

        return self._decode_generation(generated_ids)

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `transformers` models.

        TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

        """
        raise NotImplementedError(
            "Streaming is not implemented for Transformers models."
        )

    def _generate_output_seq(self, prompts, inputs, **inference_kwargs):
        input_ids = inputs["input_ids"]

        output_ids = self.model.generate(
            **inputs,
            **inference_kwargs,
        )

        # encoder-decoder returns output_ids only, decoder-only returns full seq ids
        if self.model.config.is_encoder_decoder:
            generated_ids = output_ids
        else:
            generated_ids = output_ids[:, input_ids.shape[1] :]

        return generated_ids

    def _decode_generation(self, generated_ids: "torch.Tensor"):
        if len(generated_ids.shape) == 1:
            return self.tokenizer.decode([generated_ids])[0]
        elif len(generated_ids.shape) == 2:
            return self.tokenizer.decode(generated_ids)
        elif len(generated_ids.shape) == 3:
            return [
                self.tokenizer.decode(generated_ids[i])
                for i in range(len(generated_ids))
            ]
        else:  # pragma: no cover
            raise TypeError(
                "Generated outputs aren't 1D, 2D or 3D, but instead are "
                f"{generated_ids.shape}"
            )

`init(model, tokenizer, *, device_dtype=None)`

Parameters:

model A PreTrainedModel, or any model that is compatible with the transformers API for models. tokenizer A PreTrainedTokenizer, or any tokenizer that is compatible with the transformers API for tokenizers. device_dtype The dtype to use for the model. If not provided, the model will use the default dtype.

Source code in outlines/models/transformers.py

def __init__(
    self,
    model: "PreTrainedModel",
    tokenizer: "PreTrainedTokenizer",
    *,
    device_dtype: Optional["torch.dtype"] = None,
):
    """
    Parameters:
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    tokenizer
        A `PreTrainedTokenizer`, or any tokenizer that is compatible with
        the `transformers` API for tokenizers.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    """
    # We need to handle the cases in which jax/flax or tensorflow
    # is not available in the environment.
    try:
        from transformers import FlaxPreTrainedModel
    except ImportError:  # pragma: no cover
        FlaxPreTrainedModel = None

    try:
        from transformers import TFPreTrainedModel
    except ImportError:  # pragma: no cover
        TFPreTrainedModel = None

    tokenizer.padding_side = "left"
    self.model = model
    self.hf_tokenizer = tokenizer
    self.tokenizer = TransformerTokenizer(tokenizer)
    self.device_dtype = device_dtype
    self.type_adapter = TransformersTypeAdapter(
        tokenizer=tokenizer,
        has_chat_template=_check_hf_chat_template(tokenizer)
    )

    if (
        FlaxPreTrainedModel is not None
        and isinstance(model, FlaxPreTrainedModel)
    ):  # pragma: no cover
        self.tensor_library_name = "jax"
        warnings.warn("""
            Support for `jax` has been deprecated and will be removed in
            version 1.4.0 of Outlines. Please use `torch` instead.
            Transformers models using `jax` do not support structured
            generation.
            """,
            DeprecationWarning,
            stacklevel=2,
        )
    elif (
        TFPreTrainedModel is not None
        and isinstance(model, TFPreTrainedModel)
    ):  # pragma: no cover
        self.tensor_library_name = "tensorflow"
        warnings.warn("""
            Support for `tensorflow` has been deprecated and will be removed in
            version 1.4.0 of Outlines. Please use `torch` instead.
            Transformers models using `tensorflow` do not support structured
            generation.
            """,
            DeprecationWarning,
            stacklevel=2,
        )
    else:
        self.tensor_library_name = "torch"

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using transformers.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[str, dict, Chat]`	The prompt based on which the model will generate a response. For multi-modal models, the input should be a dictionary containing the `text` key with a value of type `Union[str, List[str]]` and the other keys required by the model.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method of the `transformers` model.	`{}`

Returns:

Type	Description
`Union[str, List[str]]`	The text generated by the model.

Source code in outlines/models/transformers.py

def generate(
    self,
    model_input: Union[str, dict, Chat],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str]]:
    """Generate text using `transformers`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response. For
        multi-modal models, the input should be a dictionary containing the
        `text` key with a value of type `Union[str, List[str]]` and the
        other keys required by the model.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        of the `transformers` model.

    Returns
    -------
    Union[str, List[str]]
        The text generated by the model.

    """
    prompts, inputs = self._prepare_model_inputs(model_input, False)
    logits_processor = self.type_adapter.format_output_type(output_type)

    generated_ids = self._generate_output_seq(
        prompts,
        inputs,
        logits_processor=logits_processor,
        **inference_kwargs,
    )

    # required for multi-modal models that return a 2D tensor even when
    # num_return_sequences is 1
    num_samples = inference_kwargs.get("num_return_sequences", 1)
    if num_samples == 1 and len(generated_ids.shape) == 2:
        generated_ids = generated_ids.squeeze(0)

    return self._decode_generation(generated_ids)

`generate_batch(model_input, output_type=None, **inference_kwargs)`

Source code in outlines/models/transformers.py

def generate_batch(
    self,
    model_input: List[Union[str, dict, Chat]],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> List[Union[str, List[str]]]:
    """"""
    prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore
    logits_processor = self.type_adapter.format_output_type(output_type)

    generated_ids = self._generate_output_seq(
        prompts, inputs, logits_processor=logits_processor, **inference_kwargs
    )

    # if there are multiple samples per input, convert generated_id to 3D
    num_samples = inference_kwargs.get("num_return_sequences", 1)
    if num_samples > 1:
        generated_ids = generated_ids.view(len(model_input), num_samples, -1)

    return self._decode_generation(generated_ids)

`generate_stream(model_input, output_type, **inference_kwargs)`

Not available for transformers models.

TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

Source code in outlines/models/transformers.py

def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `transformers` models.

    TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

    """
    raise NotImplementedError(
        "Streaming is not implemented for Transformers models."
    )

`TransformersMultiModal`

Bases: Transformers

Thin wrapper around a transformers model and a transformers processor.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and processor.

Source code in outlines/models/transformers.py

class TransformersMultiModal(Transformers):
    """Thin wrapper around a `transformers` model and a `transformers`
    processor.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    processor.

    """

    def __init__(
        self,
        model: "PreTrainedModel",
        processor,
        *,
        device_dtype: Optional["torch.dtype"] = None,
    ):
        """Create a TransformersMultiModal model instance

        We rely on the `__init__` method of the `Transformers` class to handle
        most of the initialization and then add elements specific to multimodal
        models.

        Parameters
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        processor
            A `ProcessorMixin` instance.
        device_dtype
            The dtype to use for the model. If not provided, the model will use
            the default dtype.

        """
        self.processor = processor
        self.processor.padding_side = "left"
        self.processor.pad_token = "[PAD]"

        tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

        super().__init__(model, tokenizer, device_dtype=device_dtype)

        self.type_adapter = TransformersMultiModalTypeAdapter(
            tokenizer=tokenizer
        )

    def _prepare_model_inputs(
        self,
        model_input,
        is_batch: bool = False,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        if is_batch:
            prompts = [
                self.type_adapter.format_input(item) for item in model_input
            ]
        else:
            prompts = self.type_adapter.format_input(model_input)

        # The expected format is a single dict
        if is_batch:
            merged_prompts = defaultdict(list)
            for d in prompts:
                for key, value in d.items():
                    if key == "text":
                        merged_prompts[key].append(value)
                    else:
                        merged_prompts[key].extend(value)
        else:
            merged_prompts = prompts # type: ignore

        inputs = self.processor(
            **merged_prompts, padding=True, return_tensors="pt"
        )
        if self.device_dtype is not None:
            inputs = inputs.to(self.model.device, dtype=self.device_dtype)
        else:
            inputs = inputs.to(self.model.device)

        return merged_prompts["text"], inputs

`init(model, processor, *, device_dtype=None)`

Create a TransformersMultiModal model instance

We rely on the __init__ method of the Transformers class to handle most of the initialization and then add elements specific to multimodal models.

Parameters:

Name	Type	Description	Default
`model`	`PreTrainedModel`	A `PreTrainedModel`, or any model that is compatible with the `transformers` API for models.	required
`processor`		A `ProcessorMixin` instance.	required
`device_dtype`	`Optional[dtype]`	The dtype to use for the model. If not provided, the model will use the default dtype.	`None`

Source code in outlines/models/transformers.py

def __init__(
    self,
    model: "PreTrainedModel",
    processor,
    *,
    device_dtype: Optional["torch.dtype"] = None,
):
    """Create a TransformersMultiModal model instance

    We rely on the `__init__` method of the `Transformers` class to handle
    most of the initialization and then add elements specific to multimodal
    models.

    Parameters
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    processor
        A `ProcessorMixin` instance.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    """
    self.processor = processor
    self.processor.padding_side = "left"
    self.processor.pad_token = "[PAD]"

    tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

    super().__init__(model, tokenizer, device_dtype=device_dtype)

    self.type_adapter = TransformersMultiModalTypeAdapter(
        tokenizer=tokenizer
    )

`VLLM`

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py

class VLLM(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "OpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("VLLM does not support batch inference.")

    def generate_stream(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`OpenAI`	An `openai.OpenAI` client instance.	required

Source code in outlines/models/vllm.py

def __init__(
    self,
    client: "OpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/vllm.py

def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py

def generate_stream(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`VLLMOffline`

Bases: Model

Thin wrapper around a vllm.LLM model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the vllm.LLM model.

Source code in outlines/models/vllm_offline.py

class VLLMOffline(Model):
    """Thin wrapper around a `vllm.LLM` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `vllm.LLM` model.

    """

    def __init__(self, model: "LLM"):
        """Create a VLLM model instance.

        Parameters
        ----------
        model
            A `vllm.LLM` model instance.

        """
        self.model = model
        self.tokenizer = self.model.get_tokenizer()
        self.type_adapter = VLLMOfflineTypeAdapter(has_chat_template=self._check_chat_template())

    def _build_generation_args(
        self,
        inference_kwargs: dict,
        output_type: Optional[Any] = None,
    ) -> "SamplingParams":
        """Create the `SamplingParams` object to pass to the `generate` method
        of the `vllm.LLM` model."""
        from vllm.sampling_params import StructuredOutputsParams, SamplingParams

        sampling_params = inference_kwargs.pop("sampling_params", None)

        if sampling_params is None:
            sampling_params = SamplingParams()

        output_type_args = self.type_adapter.format_output_type(output_type)
        if output_type_args:
            original_sampling_params_dict = {f: getattr(sampling_params, f) for f in sampling_params.__struct_fields__}
            sampling_params_dict = {**original_sampling_params_dict, "structured_outputs": StructuredOutputsParams(**output_type_args)}
            sampling_params = SamplingParams(**sampling_params_dict)

        return sampling_params

    def generate(
        self,
        model_input: Chat | str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str]]:
        """Generate text using vLLM offline.

        Parameters
        ----------
        prompt
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            in the `vllm.LLM` model.

        Returns
        -------
        Union[str, List[str]]
            The text generated by the model.

        """
        sampling_params = self._build_generation_args(
            inference_kwargs,
            output_type,
        )

        model_input = self.type_adapter.format_input(model_input)

        if isinstance(model_input, list):
            results = self.model.chat(
                messages=model_input,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        else:
            results = self.model.generate(
                prompts=model_input,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        results = [completion.text for completion in results[0].outputs]

        if len(results) == 1:
            return results[0]
        else:
            return results

    def generate_batch(
        self,
        model_input: List[Chat | str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[List[str], List[List[str]]]:
        """Generate a batch of completions using vLLM offline.

        Parameters
        ----------
        prompt
            The list of prompts based on which the model will generate a
            response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            in the `vllm.LLM` model.

        Returns
        -------
        Union[List[str], List[List[str]]]
            The text generated by the model.

        """
        sampling_params = self._build_generation_args(
            inference_kwargs,
            output_type,
        )

        model_inputs = [self.type_adapter.format_input(item) for item in model_input]

        if model_inputs and isinstance(model_inputs[0], list):
            results = self.model.chat(
                messages=model_inputs,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        else:
            results = self.model.generate(
                prompts=model_inputs,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        return [[sample.text for sample in batch.outputs] for batch in results]

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `vllm.LLM`.

        TODO: Implement the streaming functionality ourselves.

        """
        raise NotImplementedError(
            "Streaming is not available for the vLLM offline integration."
        )

    def _check_chat_template(self) -> bool:
        """Check if the tokenizer has a chat template."""
        # 1. Try HuggingFace-style chat template check (get_chat_template).
        # Only return early on True; on False or any exception fall through to
        # step 2 so that vLLM-style tokenizers are still handled correctly.
        if hasattr(self.tokenizer, "chat_template") or hasattr(self.tokenizer, "apply_chat_template"):
            try:
                from outlines.models.tokenizer import _check_hf_chat_template
                if _check_hf_chat_template(self.tokenizer):
                    return True
            except Exception:
                pass

        # 2. Try vLLM-style apply_chat_template (works for old and new vLLM).
        if hasattr(self.tokenizer, "apply_chat_template"):
            try:
                self.tokenizer.apply_chat_template([{"role": "user", "content": "test"}])
                return True
            except Exception:
                pass

        # 3. Default: no chat template
        return False

`init(model)`

Create a VLLM model instance.

Parameters:

Name	Type	Description	Default
`model`	`LLM`	A `vllm.LLM` model instance.	required

Source code in outlines/models/vllm_offline.py

def __init__(self, model: "LLM"):
    """Create a VLLM model instance.

    Parameters
    ----------
    model
        A `vllm.LLM` model instance.

    """
    self.model = model
    self.tokenizer = self.model.get_tokenizer()
    self.type_adapter = VLLMOfflineTypeAdapter(has_chat_template=self._check_chat_template())

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using vLLM offline.

Parameters:

Name	Type	Description	Default
`prompt`		The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method in the `vllm.LLM` model.	`{}`

Returns:

Type	Description
`Union[str, List[str]]`	The text generated by the model.

Source code in outlines/models/vllm_offline.py

def generate(
    self,
    model_input: Chat | str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str]]:
    """Generate text using vLLM offline.

    Parameters
    ----------
    prompt
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        in the `vllm.LLM` model.

    Returns
    -------
    Union[str, List[str]]
        The text generated by the model.

    """
    sampling_params = self._build_generation_args(
        inference_kwargs,
        output_type,
    )

    model_input = self.type_adapter.format_input(model_input)

    if isinstance(model_input, list):
        results = self.model.chat(
            messages=model_input,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    else:
        results = self.model.generate(
            prompts=model_input,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    results = [completion.text for completion in results[0].outputs]

    if len(results) == 1:
        return results[0]
    else:
        return results

`generate_batch(model_input, output_type=None, **inference_kwargs)`

Generate a batch of completions using vLLM offline.

Parameters:

Name	Type	Description	Default
`prompt`		The list of prompts based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method in the `vllm.LLM` model.	`{}`

Returns:

Type	Description
`Union[List[str], List[List[str]]]`	The text generated by the model.

Source code in outlines/models/vllm_offline.py

def generate_batch(
    self,
    model_input: List[Chat | str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[List[str], List[List[str]]]:
    """Generate a batch of completions using vLLM offline.

    Parameters
    ----------
    prompt
        The list of prompts based on which the model will generate a
        response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        in the `vllm.LLM` model.

    Returns
    -------
    Union[List[str], List[List[str]]]
        The text generated by the model.

    """
    sampling_params = self._build_generation_args(
        inference_kwargs,
        output_type,
    )

    model_inputs = [self.type_adapter.format_input(item) for item in model_input]

    if model_inputs and isinstance(model_inputs[0], list):
        results = self.model.chat(
            messages=model_inputs,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    else:
        results = self.model.generate(
            prompts=model_inputs,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    return [[sample.text for sample in batch.outputs] for batch in results]

`generate_stream(model_input, output_type, **inference_kwargs)`

Not available for vllm.LLM.

TODO: Implement the streaming functionality ourselves.

Source code in outlines/models/vllm_offline.py

def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `vllm.LLM`.

    TODO: Implement the streaming functionality ourselves.

    """
    raise NotImplementedError(
        "Streaming is not available for the vLLM offline integration."
    )

`from_anthropic(client, model_name=None)`

Create an Outlines Anthropic model instance from an anthropic.Anthropic client instance.

Parameters:

Name	Type	Description	Default
`client`	`Anthropic`	An `anthropic.Anthropic` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Anthropic`	An Outlines `Anthropic` model instance.

Source code in outlines/models/anthropic.py

def from_anthropic(
    client: "AnthropicClient", model_name: Optional[str] = None
) -> Anthropic:
    """Create an Outlines `Anthropic` model instance from an
    `anthropic.Anthropic` client instance.

    Parameters
    ----------
    client
        An `anthropic.Anthropic` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Anthropic
        An Outlines `Anthropic` model instance.

    """
    return Anthropic(client, model_name)

`from_dottxt(client, model=None)`

Create an Outlines Dottxt or AsyncDottxt model instance from a dottxt.DotTxt or dottxt.AsyncDotTxt client instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[DotTxt, AsyncDotTxt]`	A `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Returns:

Type	Description
`Union[Dottxt, AsyncDottxt]`	An Outlines `Dottxt` or `AsyncDottxt` model instance.

Source code in outlines/models/dottxt.py

def from_dottxt(
    client: "Union[DottxtClient, AsyncDottxtClient]",
    model: Optional[str] = None,
) -> Union[Dottxt, AsyncDottxt]:
    """Create an Outlines `Dottxt` or `AsyncDottxt` model instance from a
    `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.

    Parameters
    ----------
    client
        A `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    Returns
    -------
    Union[Dottxt, AsyncDottxt]
        An Outlines `Dottxt` or `AsyncDottxt` model instance.

    """
    from dottxt import AsyncDotTxt, DotTxt

    if isinstance(client, DotTxt):
        return Dottxt(client, model)
    elif isinstance(client, AsyncDotTxt):
        return AsyncDottxt(client, model)
    else:
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "`dottxt.DotTxt` or `dottxt.AsyncDotTxt`."
        )

`from_gemini(client, model_name=None)`

Create an Outlines Gemini model instance from a google.genai.Client instance.

Parameters:

Name	Type	Description	Default
`client`	`Client`	A `google.genai.Client` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Gemini`	An Outlines `Gemini` model instance.

Source code in outlines/models/gemini.py

def from_gemini(client: "Client", model_name: Optional[str] = None) -> Gemini:
    """Create an Outlines `Gemini` model instance from a
    `google.genai.Client` instance.

    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Gemini
        An Outlines `Gemini` model instance.

    """
    return Gemini(client, model_name)

`from_llamacpp(model, chat_mode=True)`

Create an Outlines LlamaCpp model instance from a llama_cpp.Llama instance.

Parameters:

Name	Type	Description	Default
`model`	`Llama`	A `llama_cpp.Llama` instance.	required
`chat_mode`	`bool`	Whether to enable chat mode. If `False`, the model will regard all `str` inputs as plain text prompts. If `True`, the model will regard all `str` inputs as user messages in a chat conversation.	`True`

Returns:

Type	Description
`LlamaCpp`	An Outlines `LlamaCpp` model instance.

Source code in outlines/models/llamacpp.py

def from_llamacpp(model: "Llama", chat_mode: bool = True) -> LlamaCpp:
    """Create an Outlines `LlamaCpp` model instance from a
    `llama_cpp.Llama` instance.

    Parameters
    ----------
    model
        A `llama_cpp.Llama` instance.
    chat_mode
        Whether to enable chat mode. If `False`, the model will regard
        all `str` inputs as plain text prompts. If `True`, the model will
        regard all `str` inputs as user messages in a chat conversation.

    Returns
    -------
    LlamaCpp
        An Outlines `LlamaCpp` model instance.

    """
    return LlamaCpp(model, chat_mode=chat_mode)

`from_lmstudio(client, model_name=None)`

Create an Outlines LMStudio model instance from a lmstudio.Client or lmstudio.AsyncClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[Client, AsyncClient]`	A `lmstudio.Client` or `lmstudio.AsyncClient` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[LMStudio, AsyncLMStudio]`	An Outlines `LMStudio` or `AsyncLMStudio` model instance.

Source code in outlines/models/lmstudio.py

def from_lmstudio(
    client: Union["Client", "AsyncClient"],
    model_name: Optional[str] = None,
) -> Union[LMStudio, AsyncLMStudio]:
    """Create an Outlines `LMStudio` model instance from a
    `lmstudio.Client` or `lmstudio.AsyncClient` instance.

    Parameters
    ----------
    client
        A `lmstudio.Client` or `lmstudio.AsyncClient` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[LMStudio, AsyncLMStudio]
        An Outlines `LMStudio` or `AsyncLMStudio` model instance.

    """
    from lmstudio import AsyncClient, Client

    if isinstance(client, Client):
        return LMStudio(client, model_name)
    elif isinstance(client, AsyncClient):
        return AsyncLMStudio(client, model_name)
    else:
        raise ValueError(
            "Invalid client type, the client must be an instance of "
            "`lmstudio.Client` or `lmstudio.AsyncClient`."
        )

`from_mistral(client, model_name=None, async_client=False)`

Create an Outlines Mistral model instance from a mistralai.Mistral client.

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`
`async_client`	`bool`	If True, return an AsyncMistral instance; otherwise, return a Mistral instance.	`False`

Returns:

Type	Description
`Union[Mistral, AsyncMistral]`	An Outlines Mistral or AsyncMistral model instance.

Source code in outlines/models/mistral.py

def from_mistral(
    client: "MistralClient",
    model_name: Optional[str] = None,
    async_client: bool = False,
) -> Union[Mistral, AsyncMistral]:
    """Create an Outlines Mistral model instance from a mistralai.Mistral
    client.

    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.
    async_client : bool
        If True, return an AsyncMistral instance;
        otherwise, return a Mistral instance.

    Returns
    -------
    Union[Mistral, AsyncMistral]
        An Outlines Mistral or AsyncMistral model instance.

    """
    from mistralai import Mistral as MistralClient

    if not isinstance(client, MistralClient):
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "`mistralai.Mistral`."
        )

    if async_client:
        return AsyncMistral(client, model_name)
    else:
        return Mistral(client, model_name)

`from_mlxlm(model, tokenizer)`

Create an Outlines MLXLM model instance from an mlx_lm model and a tokenizer.

Parameters:

Name	Type	Description	Default
`model`	`Module`	An instance of an `mlx_lm` model.	required
`tokenizer`	`MLXTokenizer`	An instance of an `mlx_lm` tokenizer or of a compatible transformers tokenizer.	required

Returns:

Type	Description
`MLXLM`	An Outlines `MLXLM` model instance.

Source code in outlines/models/mlxlm.py

def from_mlxlm(model: "nn.Module", tokenizer: "MLXTokenizer") -> MLXLM:
    """Create an Outlines `MLXLM` model instance from an `mlx_lm` model and a
    tokenizer.

    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        transformers tokenizer.

    Returns
    -------
    MLXLM
        An Outlines `MLXLM` model instance.

    """
    return MLXLM(model, tokenizer)

`from_ollama(client, model_name=None)`

Create an Outlines Ollama model instance from an ollama.Client or ollama.AsyncClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[Client, AsyncClient]`	A `ollama.Client` or `ollama.AsyncClient` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[Ollama, AsyncOllama]`	An Outlines `Ollama` or `AsyncOllama` model instance.

Source code in outlines/models/ollama.py

def from_ollama(
    client: Union["Client", "AsyncClient"], model_name: Optional[str] = None
) -> Union[Ollama, AsyncOllama]:
    """Create an Outlines `Ollama` model instance from an `ollama.Client`
    or `ollama.AsyncClient` instance.

    Parameters
    ----------
    client
        A `ollama.Client` or `ollama.AsyncClient` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[Ollama, AsyncOllama]
        An Outlines `Ollama` or `AsyncOllama` model instance.

    """
    from ollama import AsyncClient, Client

    if isinstance(client, Client):
        return Ollama(client, model_name)
    elif isinstance(client, AsyncClient):
        return AsyncOllama(client, model_name)
    else:
        raise ValueError(
            "Invalid client type, the client must be an instance of "
            "`ollama.Client` or `ollama.AsyncClient`."
        )

`from_openai(client, model_name=None)`

Create an Outlines OpenAI or AsyncOpenAI model instance from an openai.OpenAI or openai.AsyncOpenAI client.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]`	An `openai.OpenAI`, `openai.AsyncOpenAI`, `openai.AzureOpenAI` or `openai.AsyncAzureOpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`OpenAI`	An Outlines `OpenAI` or `AsyncOpenAI` model instance.

Source code in outlines/models/openai.py

def from_openai(
    client: Union[
        "OpenAIClient",
        "AsyncOpenAIClient",
        "AzureOpenAIClient",
        "AsyncAzureOpenAIClient",
    ],
    model_name: Optional[str] = None,
) -> Union[OpenAI, AsyncOpenAI]:
    """Create an Outlines `OpenAI` or `AsyncOpenAI` model instance from an
    `openai.OpenAI` or `openai.AsyncOpenAI` client.

    Parameters
    ----------
    client
        An `openai.OpenAI`, `openai.AsyncOpenAI`, `openai.AzureOpenAI` or
        `openai.AsyncAzureOpenAI` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    OpenAI
        An Outlines `OpenAI` or `AsyncOpenAI` model instance.

    """
    import openai

    if isinstance(client, openai.OpenAI):
        return OpenAI(client, model_name)
    elif isinstance(client, openai.AsyncOpenAI):
        return AsyncOpenAI(client, model_name)
    else:
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "+ `openai.OpenAI` or `openai.AsyncOpenAI`."
        )

`from_sglang(client, model_name=None)`

Create a SGLang or AsyncSGLang instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI]`	An `openai.OpenAI` or `openai.AsyncOpenAI` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[SGLang, AsyncSGLang]`	An Outlines `SGLang` or `AsyncSGLang` model instance.

Source code in outlines/models/sglang.py

def from_sglang(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[SGLang, AsyncSGLang]:
    """Create a `SGLang` or `AsyncSGLang` instance from an `openai.OpenAI` or
    `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[SGLang, AsyncSGLang]
        An Outlines `SGLang` or `AsyncSGLang` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return SGLang(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncSGLang(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

`from_tgi(client)`

Create an Outlines TGI or AsyncTGI model instance from an huggingface_hub.InferenceClient or huggingface_hub.AsyncInferenceClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[InferenceClient, AsyncInferenceClient]`	An `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient` instance.	required

Returns:

Type	Description
`Union[TGI, AsyncTGI]`	An Outlines `TGI` or `AsyncTGI` model instance.

Source code in outlines/models/tgi.py

def from_tgi(
    client: Union["InferenceClient", "AsyncInferenceClient"],
) -> Union[TGI, AsyncTGI]:
    """Create an Outlines `TGI` or `AsyncTGI` model instance from an
    `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient`
    instance.

    Parameters
    ----------
    client
        An `huggingface_hub.InferenceClient` or
        `huggingface_hub.AsyncInferenceClient` instance.

    Returns
    -------
    Union[TGI, AsyncTGI]
        An Outlines `TGI` or `AsyncTGI` model instance.

    """
    from huggingface_hub import AsyncInferenceClient, InferenceClient

    if isinstance(client, InferenceClient):
        return TGI(client)
    elif isinstance(client, AsyncInferenceClient):
        return AsyncTGI(client)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            + "Please provide an HuggingFace InferenceClient "
            + "or AsyncInferenceClient instance."
        )

`from_transformers(model, tokenizer_or_processor, *, device_dtype=None)`

Create an Outlines Transformers or TransformersMultiModal model instance from a PreTrainedModel instance and a PreTrainedTokenizer or ProcessorMixin instance.

outlines supports PreTrainedModelForCausalLM, PreTrainedMambaForCausalLM, PreTrainedModelForSeq2Seq and any model that implements the transformers model API.

Parameters:

Name	Type	Description	Default
`model`	`PreTrainedModel`	A `transformers.PreTrainedModel` instance.	required
`tokenizer_or_processor`	`Union[PreTrainedTokenizer, ProcessorMixin]`	A `transformers.PreTrainedTokenizer` or `transformers.ProcessorMixin` instance.	required
`device_dtype`	`Optional[dtype]`	The dtype to use for the model. If not provided, the model will use the default dtype.	`None`

Returns:

Type	Description
`Union[Transformers, TransformersMultiModal]`	An Outlines `Transformers` or `TransformersMultiModal` model instance.

Source code in outlines/models/transformers.py

def from_transformers(
    model: "PreTrainedModel",
    tokenizer_or_processor: Union["PreTrainedTokenizer", "ProcessorMixin"],
    *,
    device_dtype: Optional["torch.dtype"] = None,
) -> Union[Transformers, TransformersMultiModal]:
    """Create an Outlines `Transformers` or `TransformersMultiModal` model
    instance from a `PreTrainedModel` instance and a `PreTrainedTokenizer` or
    `ProcessorMixin` instance.

    `outlines` supports `PreTrainedModelForCausalLM`,
    `PreTrainedMambaForCausalLM`, `PreTrainedModelForSeq2Seq` and any model
    that implements the `transformers` model API.

    Parameters
    ----------
    model
        A `transformers.PreTrainedModel` instance.
    tokenizer_or_processor
        A `transformers.PreTrainedTokenizer` or
        `transformers.ProcessorMixin` instance.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    Returns
    -------
    Union[Transformers, TransformersMultiModal]
        An Outlines `Transformers` or `TransformersMultiModal` model instance.

    """
    from transformers import (
        PreTrainedTokenizer, PreTrainedTokenizerFast, ProcessorMixin)

    if isinstance(
        tokenizer_or_processor, (PreTrainedTokenizer, PreTrainedTokenizerFast)
    ):
        tokenizer = tokenizer_or_processor
        return Transformers(model, tokenizer, device_dtype=device_dtype)
    elif isinstance(tokenizer_or_processor, ProcessorMixin):
        processor = tokenizer_or_processor
        return TransformersMultiModal(model, processor, device_dtype=device_dtype)
    else:
        raise ValueError(
            "We couldn't determine whether the model passed to `from_transformers`"
            + " is a text-2-text or a multi-modal model. Please provide a "
            + "a transformers tokenizer or processor."
        )

`from_vllm(client, model_name=None)`

Create an Outlines VLLM or AsyncVLLM model instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI]`	An `openai.OpenAI` or `openai.AsyncOpenAI` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[VLLM, AsyncVLLM]`	An Outlines `VLLM` or `AsyncVLLM` model instance.

Source code in outlines/models/vllm.py

def from_vllm(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[VLLM, AsyncVLLM]:
    """Create an Outlines `VLLM` or `AsyncVLLM` model instance from an
    `openai.OpenAI` or `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[VLLM, AsyncVLLM]
        An Outlines `VLLM` or `AsyncVLLM` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return VLLM(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncVLLM(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

`from_vllm_offline(model)`

Create an Outlines VLLMOffline model instance from a vllm.LLM instance.

Parameters:

Name	Type	Description	Default
`model`	`LLM`	A `vllm.LLM` instance.	required

Returns:

Type	Description
`VLLMOffline`	An Outlines `VLLMOffline` model instance.

Source code in outlines/models/vllm_offline.py

def from_vllm_offline(model: "LLM") -> VLLMOffline:
    """Create an Outlines `VLLMOffline` model instance from a `vllm.LLM`
    instance.

    Parameters
    ----------
    model
        A `vllm.LLM` instance.

    Returns
    -------
    VLLMOffline
        An Outlines `VLLMOffline` model instance.

    """
    return VLLMOffline(model)

`anthropic`

Integration with Anthropic's API.

`Anthropic`

Bases: Model

Thin wrapper around the anthropic.Anthropic client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the anthropic.Anthropic client.

Source code in outlines/models/anthropic.py

class Anthropic(Model):
    """Thin wrapper around the `anthropic.Anthropic` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `anthropic.Anthropic` client.

    """
    def __init__(
        self, client: "AnthropicClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client
            An `anthropic.Anthropic` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = AnthropicTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            completion = self.client.messages.create(
                **messages,
                **inference_kwargs,
            )
        return completion.content[0].text

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Anthropic does not support batch generation."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Anthropic.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            As structured generation is not supported by Anthropic, the value
            of this argument must be `None`. Otherwise, an error will be
            raised at runtime.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)

        if output_type is not None:
            raise NotImplementedError(
                f"The type {output_type} is not available with Anthropic."
            )

        if (
            "model" not in inference_kwargs
            and self.model_name is not None
        ):
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.messages.create(
                **messages,
                stream=True,
                **inference_kwargs,
            )
            for chunk in stream:
                if (
                    chunk.type == "content_block_delta"
                    and chunk.delta.type == "text_delta"
                ):
                    yield chunk.delta.text

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Anthropic`	An `anthropic.Anthropic` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/anthropic.py

def __init__(
    self, client: "AnthropicClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client
        An `anthropic.Anthropic` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = AnthropicTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using Anthropic.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	As structured generation is not supported by Anthropic, the value of this argument must be `None`. Otherwise, an error will be raised at runtime.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The response generated by the model.

Source code in outlines/models/anthropic.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        completion = self.client.messages.create(
            **messages,
            **inference_kwargs,
        )
    return completion.content[0].text

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using Anthropic.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	As structured generation is not supported by Anthropic, the value of this argument must be `None`. Otherwise, an error will be raised at runtime.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/anthropic.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using Anthropic.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        As structured generation is not supported by Anthropic, the value
        of this argument must be `None`. Otherwise, an error will be
        raised at runtime.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)

    if output_type is not None:
        raise NotImplementedError(
            f"The type {output_type} is not available with Anthropic."
        )

    if (
        "model" not in inference_kwargs
        and self.model_name is not None
    ):
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.messages.create(
            **messages,
            stream=True,
            **inference_kwargs,
        )
        for chunk in stream:
            if (
                chunk.type == "content_block_delta"
                and chunk.delta.type == "text_delta"
            ):
                yield chunk.delta.text

`AnthropicTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the Anthropic model.

AnthropicTypeAdapter is responsible for preparing the arguments to Anthropic's messages.create method: the input (prompt and possibly image). Anthropic does not support defining the output type, so format_output_type is not implemented.

Source code in outlines/models/anthropic.py

class AnthropicTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Anthropic` model.

    `AnthropicTypeAdapter` is responsible for preparing the arguments to
    Anthropic's `messages.create` method: the input (prompt and possibly
    image).
    Anthropic does not support defining the output type, so
    `format_output_type` is not implemented.

    """

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the `messages` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        dict
            The `messages` argument to pass to the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "Anthropic. The only available types are `str`, `list` and `Chat` "
            "(containing a prompt and images)."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> dict:
        return {
            "messages": [self._create_message("user", model_input)]
        }

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> dict:
        return {
            "messages": [
                self._create_message("user", model_input)
            ]
        }

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> dict:
        """Generate the `messages` argument to pass to the client when the user
        passes a Chat instance.

        """
        return {
            "messages": [
                self._create_message(message["role"], message["content"])
                for message in model_input.messages
            ]
        }

    def _create_message(self, role: str, content: str | list) -> dict:
        """Create a message."""

        if isinstance(content, str):
            return {
                "role": role,
                "content": content,
            }

        elif isinstance(content, list):
            prompt = content[0]
            images = content[1:]

            if not all(isinstance(image, Image) for image in images):
                raise ValueError("All assets provided must be of type Image")

            image_content_messages = [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": image.image_format,
                        "data": image.image_str,
                    },
                }
                for image in images
            ]

            return {
                "role": role,
                "content": [
                    *image_content_messages,
                    {"type": "text", "text": prompt},
                ],
            }

        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                "The content must be a string or a list containing a string "
                "and a list of images."
            )

    def format_output_type(self, output_type):
        """Not implemented for Anthropic."""
        if output_type is None:
            return {}
        else:
            raise NotImplementedError(
                f"The output type {output_type} is not available with "
                "Anthropic."
            )

`format_chat_model_input(model_input)`

Generate the messages argument to pass to the client when the user passes a Chat instance.

Source code in outlines/models/anthropic.py

@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> dict:
    """Generate the `messages` argument to pass to the client when the user
    passes a Chat instance.

    """
    return {
        "messages": [
            self._create_message(message["role"], message["content"])
            for message in model_input.messages
        ]
    }

`format_input(model_input)`

Generate the messages argument to pass to the client.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`dict`	The `messages` argument to pass to the client.

Source code in outlines/models/anthropic.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the `messages` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    dict
        The `messages` argument to pass to the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "Anthropic. The only available types are `str`, `list` and `Chat` "
        "(containing a prompt and images)."
    )

`format_output_type(output_type)`

Not implemented for Anthropic.

Source code in outlines/models/anthropic.py

def format_output_type(self, output_type):
    """Not implemented for Anthropic."""
    if output_type is None:
        return {}
    else:
        raise NotImplementedError(
            f"The output type {output_type} is not available with "
            "Anthropic."
        )

`from_anthropic(client, model_name=None)`

Create an Outlines Anthropic model instance from an anthropic.Anthropic client instance.

Parameters:

Name	Type	Description	Default
`client`	`Anthropic`	An `anthropic.Anthropic` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Anthropic`	An Outlines `Anthropic` model instance.

Source code in outlines/models/anthropic.py

def from_anthropic(
    client: "AnthropicClient", model_name: Optional[str] = None
) -> Anthropic:
    """Create an Outlines `Anthropic` model instance from an
    `anthropic.Anthropic` client instance.

    Parameters
    ----------
    client
        An `anthropic.Anthropic` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Anthropic
        An Outlines `Anthropic` model instance.

    """
    return Anthropic(client, model_name)

`base`

Base classes for all models and model type adapters.

`AsyncModel`

Bases: ABC

Base class for all asynchronous models.

This class defines shared __call__, batch and stream methods that can be used to call the model directly. The generate, generate_batch, and generate_stream methods must be implemented by the subclasses. All models inheriting from this class must define a type_adapter attribute of type ModelTypeAdapter. The methods of the type_adapter attribute are used in the generate, generate_batch, and generate_stream methods to format the input and output types received by the model. Additionally, steerable models must define a tensor_library_name attribute.

Source code in outlines/models/base.py

class AsyncModel(ABC):
    """Base class for all asynchronous models.

    This class defines shared `__call__`, `batch` and `stream` methods that can
    be used to call the model directly. The `generate`, `generate_batch`, and
    `generate_stream` methods must be implemented by the subclasses.
    All models inheriting from this class must define a `type_adapter`
    attribute of type `ModelTypeAdapter`. The methods of the `type_adapter`
    attribute are used in the `generate`, `generate_batch`, and
    `generate_stream` methods to format the input and output types received by
    the model.
    Additionally, steerable models must define a `tensor_library_name`
    attribute.

    """
    type_adapter: ModelTypeAdapter
    tensor_library_name: str

    async def __call__(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Call the model.

        Users can call the model directly, in which case we will create a
        generator instance with the output type provided and call it.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        await generator("prompt")
        ```
        and
        ```python
        await model("prompt", Foo)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return await generator(model_input, **inference_kwargs)

    async def batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Make a batch call to the model (several inputs at once).

        Users can use the `batch` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `batch` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        await generator.batch(["prompt1", "prompt2"])
        ```
        and
        ```python
        await model.batch(["prompt1", "prompt2"], Foo)
        ```

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return await generator.batch(model_input, **inference_kwargs) # type: ignore

    async def stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> AsyncIterator[Any]:
        """Stream a response from the model.

        Users can use the `stream` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `stream` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        async for chunk in generator("prompt"):
            print(chunk)
        ```
        and
        ```python
        async for chunk in model.stream("prompt", Foo):
            print(chunk)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        AsyncIterator[Any]
            A stream of responses from the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)

        async for chunk in generator.stream(model_input, **inference_kwargs):  # type: ignore
            yield chunk

    @abstractmethod
    async def generate(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Generate a response from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        ...

    @abstractmethod
    async def generate_batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Generate a batch of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        ...

    @abstractmethod
    async def generate_stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> AsyncIterator[Any]:
        """Generate a stream of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        AsyncIterator[Any]
            A coroutine that will produce an async iterator of responses from the model.

        """
        ...

`call(model_input, output_type=None, backend=None, **inference_kwargs)` `async`

Call the model.

Users can call the model directly, in which case we will create a generator instance with the output type provided and call it. Thus, those commands are equivalent:

generator = Generator(model, Foo)
await generator("prompt")

and

await model("prompt", Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

async def __call__(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> Any:
    """Call the model.

    Users can call the model directly, in which case we will create a
    generator instance with the output type provided and call it.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    await generator("prompt")
    ```
    and
    ```python
    await model("prompt", Foo)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return await generator(model_input, **inference_kwargs)

`batch(model_input, output_type=None, backend=None, **inference_kwargs)` `async`

Make a batch call to the model (several inputs at once).

Users can use the batch method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its batch method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
await generator.batch(["prompt1", "prompt2"])

and

await model.batch(["prompt1", "prompt2"], Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

async def batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Make a batch call to the model (several inputs at once).

    Users can use the `batch` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `batch` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    await generator.batch(["prompt1", "prompt2"])
    ```
    and
    ```python
    await model.batch(["prompt1", "prompt2"], Foo)
    ```

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return await generator.batch(model_input, **inference_kwargs) # type: ignore

`generate(model_input, output_type=None, **inference_kwargs)` `abstractmethod` `async`

Generate a response from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

@abstractmethod
async def generate(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Generate a response from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    ...

`generate_batch(model_input, output_type=None, **inference_kwargs)` `abstractmethod` `async`

Generate a batch of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

@abstractmethod
async def generate_batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Generate a batch of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    ...

`generate_stream(model_input, output_type=None, **inference_kwargs)` `abstractmethod` `async`

Generate a stream of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`AsyncIterator[Any]`	A coroutine that will produce an async iterator of responses from the model.

Source code in outlines/models/base.py

@abstractmethod
async def generate_stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> AsyncIterator[Any]:
    """Generate a stream of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    AsyncIterator[Any]
        A coroutine that will produce an async iterator of responses from the model.

    """
    ...

`stream(model_input, output_type=None, backend=None, **inference_kwargs)` `async`

Stream a response from the model.

Users can use the stream method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its stream method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
async for chunk in generator("prompt"):
    print(chunk)

and

async for chunk in model.stream("prompt", Foo):
    print(chunk)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`AsyncIterator[Any]`	A stream of responses from the model.

Source code in outlines/models/base.py

async def stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> AsyncIterator[Any]:
    """Stream a response from the model.

    Users can use the `stream` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `stream` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    async for chunk in generator("prompt"):
        print(chunk)
    ```
    and
    ```python
    async for chunk in model.stream("prompt", Foo):
        print(chunk)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    AsyncIterator[Any]
        A stream of responses from the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)

    async for chunk in generator.stream(model_input, **inference_kwargs):  # type: ignore
        yield chunk

`Model`

Bases: ABC

Base class for all synchronous models.

This class defines shared __call__, batch and stream methods that can be used to call the model directly. The generate, generate_batch, and generate_stream methods must be implemented by the subclasses. All models inheriting from this class must define a type_adapter attribute of type ModelTypeAdapter. The methods of the type_adapter attribute are used in the generate, generate_batch, and generate_stream methods to format the input and output types received by the model. Additionally, steerable models must define a tensor_library_name attribute.

Source code in outlines/models/base.py

class Model(ABC):
    """Base class for all synchronous models.

    This class defines shared `__call__`, `batch` and `stream` methods that can
    be used to call the model directly. The `generate`, `generate_batch`, and
    `generate_stream` methods must be implemented by the subclasses.
    All models inheriting from this class must define a `type_adapter`
    attribute of type `ModelTypeAdapter`. The methods of the `type_adapter`
    attribute are used in the `generate`, `generate_batch`, and
    `generate_stream` methods to format the input and output types received by
    the model.
    Additionally, steerable models must define a `tensor_library_name`
    attribute.

    """
    type_adapter: ModelTypeAdapter
    tensor_library_name: str

    def __call__(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Call the model.

        Users can call the model directly, in which case we will create a
        generator instance with the output type provided and call it.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        generator("prompt")
        ```
        and
        ```python
        model("prompt", Foo)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        from outlines.generator import Generator

        return Generator(self, output_type, backend)(model_input, **inference_kwargs)

    def batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Make a batch call to the model (several inputs at once).

        Users can use the `batch` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `batch` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        generator.batch(["prompt1", "prompt2"])
        ```
        and
        ```python
        model.batch(["prompt1", "prompt2"], Foo)
        ```

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return generator.batch(model_input, **inference_kwargs) # type: ignore

    def stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        backend: Optional[str] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Stream a response from the model.

        Users can use the `stream` method from the model directly, in which
        case we will create a generator instance with the output type provided
        and then invoke its `stream` method.
        Thus, those commands are equivalent:
        ```python
        generator = Generator(model, Foo)
        for chunk in generator("prompt"):
            print(chunk)
        ```
        and
        ```python
        for chunk in model.stream("prompt", Foo):
            print(chunk)
        ```

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        backend
            The name of the backend to use to create the logits processor that
            will be used to generate the response. Only used for steerable
            models if `output_type` is provided.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        from outlines import Generator

        generator = Generator(self, output_type, backend)
        return generator.stream(model_input, **inference_kwargs) # type: ignore

    @abstractmethod
    def generate(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Any:
        """Generate a response from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Any
            The response generated by the model.

        """
        ...

    @abstractmethod
    def generate_batch(
        self,
        model_input: List[Any],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> List[Any]:
        """Generate a batch of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The list of inputs provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        List[Any]
            The list of responses generated by the model.

        """
        ...
    @abstractmethod
    def generate_stream(
        self,
        model_input: Any,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any
    ) -> Iterator[Any]:
        """Generate a stream of responses from the model.

        The output_type argument contains a logits processor for steerable
        models while it contains a type (Json, Enum...) for black-box models.
        This method is not intended to be used directly by end users.

        Parameters
        ----------
        model_input
            The input provided by the user.
        output_type
            The output type provided by the user.
        **inference_kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[Any]
            A stream of responses from the model.

        """
        ...

`call(model_input, output_type=None, backend=None, **inference_kwargs)`

Call the model.

Users can call the model directly, in which case we will create a generator instance with the output type provided and call it. Thus, those commands are equivalent:

generator = Generator(model, Foo)
generator("prompt")

and

model("prompt", Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

def __call__(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> Any:
    """Call the model.

    Users can call the model directly, in which case we will create a
    generator instance with the output type provided and call it.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    generator("prompt")
    ```
    and
    ```python
    model("prompt", Foo)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    from outlines.generator import Generator

    return Generator(self, output_type, backend)(model_input, **inference_kwargs)

`batch(model_input, output_type=None, backend=None, **inference_kwargs)`

Make a batch call to the model (several inputs at once).

Users can use the batch method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its batch method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
generator.batch(["prompt1", "prompt2"])

and

model.batch(["prompt1", "prompt2"], Foo)

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

def batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Make a batch call to the model (several inputs at once).

    Users can use the `batch` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `batch` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    generator.batch(["prompt1", "prompt2"])
    ```
    and
    ```python
    model.batch(["prompt1", "prompt2"], Foo)
    ```

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return generator.batch(model_input, **inference_kwargs) # type: ignore

`generate(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a response from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Any`	The response generated by the model.

Source code in outlines/models/base.py

@abstractmethod
def generate(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Any:
    """Generate a response from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Any
        The response generated by the model.

    """
    ...

`generate_batch(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a batch of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`List[Any]`	The list of inputs provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`List[Any]`	The list of responses generated by the model.

Source code in outlines/models/base.py

@abstractmethod
def generate_batch(
    self,
    model_input: List[Any],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> List[Any]:
    """Generate a batch of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The list of inputs provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    List[Any]
        The list of responses generated by the model.

    """
    ...

`generate_stream(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

Generate a stream of responses from the model.

The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[Any]`	A stream of responses from the model.

Source code in outlines/models/base.py

@abstractmethod
def generate_stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Generate a stream of responses from the model.

    The output_type argument contains a logits processor for steerable
    models while it contains a type (Json, Enum...) for black-box models.
    This method is not intended to be used directly by end users.

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    ...

`stream(model_input, output_type=None, backend=None, **inference_kwargs)`

Stream a response from the model.

Users can use the stream method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its stream method. Thus, those commands are equivalent:

generator = Generator(model, Foo)
for chunk in generator("prompt"):
    print(chunk)

and

for chunk in model.stream("prompt", Foo):
    print(chunk)

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`
`backend`	`Optional[str]`	The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[Any]`	A stream of responses from the model.

Source code in outlines/models/base.py

def stream(
    self,
    model_input: Any,
    output_type: Optional[Any] = None,
    backend: Optional[str] = None,
    **inference_kwargs: Any
) -> Iterator[Any]:
    """Stream a response from the model.

    Users can use the `stream` method from the model directly, in which
    case we will create a generator instance with the output type provided
    and then invoke its `stream` method.
    Thus, those commands are equivalent:
    ```python
    generator = Generator(model, Foo)
    for chunk in generator("prompt"):
        print(chunk)
    ```
    and
    ```python
    for chunk in model.stream("prompt", Foo):
        print(chunk)
    ```

    Parameters
    ----------
    model_input
        The input provided by the user.
    output_type
        The output type provided by the user.
    backend
        The name of the backend to use to create the logits processor that
        will be used to generate the response. Only used for steerable
        models if `output_type` is provided.
    **inference_kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[Any]
        A stream of responses from the model.

    """
    from outlines import Generator

    generator = Generator(self, output_type, backend)
    return generator.stream(model_input, **inference_kwargs) # type: ignore

`ModelTypeAdapter`

Bases: ABC

Base class for all model type adapters.

A type adapter instance must be given as a value to the type_adapter attribute when instantiating a model. The type adapter is responsible for formatting the input and output types passed to the model to match the specific format expected by the associated model.

Source code in outlines/models/base.py

class ModelTypeAdapter(ABC):
    """Base class for all model type adapters.

    A type adapter instance must be given as a value to the `type_adapter`
    attribute when instantiating a model.
    The type adapter is responsible for formatting the input and output types
    passed to the model to match the specific format expected by the
    associated model.

    """

    @abstractmethod
    def format_input(self, model_input: Any) -> Any:
        """Format the user input to the expected format of the model.

        For API-based models, it typically means creating the `messages`
        argument passed to the client. For local models, it can mean casting
        the input from str to list for instance.
        This method is also used to validate that the input type provided by
        the user is supported by the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        Any
            The formatted input to be passed to the model.

        """
        ...

    @abstractmethod
    def format_output_type(self, output_type: Optional[Any] = None) -> Any:
        """Format the output type to the expected format of the model.

        For black-box models, this typically means creating a `response_format`
        argument. For steerable models, it means formatting the logits processor
        to create the object type expected by the model.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        Any
            The formatted output type to be passed to the model.

        """
        ...

`format_input(model_input)` `abstractmethod`

Format the user input to the expected format of the model.

For API-based models, it typically means creating the messages argument passed to the client. For local models, it can mean casting the input from str to list for instance. This method is also used to validate that the input type provided by the user is supported by the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Any`	The input provided by the user.	required

Returns:

Type	Description
`Any`	The formatted input to be passed to the model.

Source code in outlines/models/base.py

@abstractmethod
def format_input(self, model_input: Any) -> Any:
    """Format the user input to the expected format of the model.

    For API-based models, it typically means creating the `messages`
    argument passed to the client. For local models, it can mean casting
    the input from str to list for instance.
    This method is also used to validate that the input type provided by
    the user is supported by the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    Any
        The formatted input to be passed to the model.

    """
    ...

`format_output_type(output_type=None)` `abstractmethod`

Format the output type to the expected format of the model.

For black-box models, this typically means creating a response_format argument. For steerable models, it means formatting the logits processor to create the object type expected by the model.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`Any`	The formatted output type to be passed to the model.

Source code in outlines/models/base.py

@abstractmethod
def format_output_type(self, output_type: Optional[Any] = None) -> Any:
    """Format the output type to the expected format of the model.

    For black-box models, this typically means creating a `response_format`
    argument. For steerable models, it means formatting the logits processor
    to create the object type expected by the model.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    Any
        The formatted output type to be passed to the model.

    """
    ...

`dottxt`

Integration with Dottxt's API.

`AsyncDottxt`

Bases: AsyncModel

Async thin wrapper around the dottxt.client.AsyncDotTxt client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the dottxt.client.AsyncDotTxt client.

Source code in outlines/models/dottxt.py

class AsyncDottxt(AsyncModel):
    """Async thin wrapper around the `dottxt.client.AsyncDotTxt` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `dottxt.client.AsyncDotTxt`
    client.

    """

    def __init__(
        self,
        client: "AsyncDottxtClient",
        model: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            A `dottxt.AsyncDotTxt` client.
        model
            The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

        """
        self.client = client
        self.model = model
        self.type_adapter = DottxtTypeAdapter()

    async def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Dottxt asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model as a JSON string.

        """
        prompt = self.type_adapter.format_input(model_input)
        json_schema = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model is not None:
            inference_kwargs["model"] = self.model

        if "model" not in inference_kwargs:
            raise ValueError(
                "A model identifier is required. Pass it to `from_dottxt_async()` "
                "or as a `model=` keyword argument at generation time."
            )

        with normalize_provider_errors(PROVIDER):
            result = await self.client.generate(
                input=prompt,
                response_format=json_schema,
                **inference_kwargs,
            )

        return json.dumps(result)

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support batch generation."
        )

    async def generate_stream(  # type: ignore[override]
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support streaming. Call the model/generator for "
            + "regular generation instead."
        )
        yield  # makes this an async generator so `async for` can consume it

`init(client, model=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncDotTxt`	A `dottxt.AsyncDotTxt` client.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Source code in outlines/models/dottxt.py

def __init__(
    self,
    client: "AsyncDottxtClient",
    model: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        A `dottxt.AsyncDotTxt` client.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    """
    self.client = client
    self.model = model
    self.type_adapter = DottxtTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using Dottxt asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model as a JSON string.

Source code in outlines/models/dottxt.py

async def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Dottxt asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model as a JSON string.

    """
    prompt = self.type_adapter.format_input(model_input)
    json_schema = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model is not None:
        inference_kwargs["model"] = self.model

    if "model" not in inference_kwargs:
        raise ValueError(
            "A model identifier is required. Pass it to `from_dottxt_async()` "
            "or as a `model=` keyword argument at generation time."
        )

    with normalize_provider_errors(PROVIDER):
        result = await self.client.generate(
            input=prompt,
            response_format=json_schema,
            **inference_kwargs,
        )

    return json.dumps(result)

`Dottxt`

Bases: Model

Thin wrapper around the dottxt.client.DotTxt client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the dottxt.client.DotTxt client.

Source code in outlines/models/dottxt.py

class Dottxt(Model):
    """Thin wrapper around the `dottxt.client.DotTxt` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `dottxt.client.DotTxt` client.

    """

    def __init__(
        self,
        client: "DottxtClient",
        model: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            A `dottxt.DotTxt` client.
        model
            The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

        """
        self.client = client
        self.model = model
        self.type_adapter = DottxtTypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using Dottxt.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model as a JSON string.

        """
        prompt = self.type_adapter.format_input(model_input)
        json_schema = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model is not None:
            inference_kwargs["model"] = self.model

        if "model" not in inference_kwargs:
            raise ValueError(
                "A model identifier is required. Pass it to `from_dottxt()` "
                "or as a `model=` keyword argument at generation time."
            )

        with normalize_provider_errors(PROVIDER):
            result = self.client.generate(
                input=prompt,
                response_format=json_schema,
                **inference_kwargs,
            )

        return json.dumps(result)


    def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support batch generation."
        )

    def generate_stream(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Dottxt does not support streaming. Call the model/generator for "
            + "regular generation instead."
        )

`init(client, model=None)`

Parameters:

Name	Type	Description	Default
`client`	`DotTxt`	A `dottxt.DotTxt` client.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Source code in outlines/models/dottxt.py

def __init__(
    self,
    client: "DottxtClient",
    model: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        A `dottxt.DotTxt` client.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    """
    self.client = client
    self.model = model
    self.type_adapter = DottxtTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using Dottxt.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model as a JSON string.

Source code in outlines/models/dottxt.py

def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using Dottxt.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model as a JSON string.

    """
    prompt = self.type_adapter.format_input(model_input)
    json_schema = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model is not None:
        inference_kwargs["model"] = self.model

    if "model" not in inference_kwargs:
        raise ValueError(
            "A model identifier is required. Pass it to `from_dottxt()` "
            "or as a `model=` keyword argument at generation time."
        )

    with normalize_provider_errors(PROVIDER):
        result = self.client.generate(
            input=prompt,
            response_format=json_schema,
            **inference_kwargs,
        )

    return json.dumps(result)

`DottxtTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the Dottxt model.

Source code in outlines/models/dottxt.py

class DottxtTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Dottxt` model."""

    def format_input(self, model_input: str) -> str:
        """Format the prompt to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str
            The input to pass to the client.

        """
        if isinstance(model_input, str):
            return model_input
        raise TypeError(
            f"The input type {model_input} is not available with Dottxt. "
            "The only available type is `str`."
        )

    def format_output_type(self, output_type: Optional[Any] = None) -> str:
        """Format the output type to pass to the client.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        str
            The output type to pass to the client as a JSON schema string.

        """
        if output_type is None:
            raise TypeError(
                "You must provide an output type. Dottxt only supports "
                "constrained generation."
            )
        elif isinstance(output_type, Regex):
            raise TypeError(
                "Regex-based structured outputs will soon be available with "
                "Dottxt. Use an open source model in the meantime."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs will soon be available with "
                "Dottxt. Use an open source model in the meantime."
            )
        elif JsonSchema.is_json_schema(output_type):
            return cast(str, JsonSchema.convert_to(output_type, ["str"]))
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by Dottxt. "
                "Consider using a local mode instead."
            )

`format_input(model_input)`

Format the prompt to pass to the client.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The input provided by the user.	required

Returns:

Type	Description
`str`	The input to pass to the client.

Source code in outlines/models/dottxt.py

def format_input(self, model_input: str) -> str:
    """Format the prompt to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str
        The input to pass to the client.

    """
    if isinstance(model_input, str):
        return model_input
    raise TypeError(
        f"The input type {model_input} is not available with Dottxt. "
        "The only available type is `str`."
    )

`format_output_type(output_type=None)`

Format the output type to pass to the client.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`str`	The output type to pass to the client as a JSON schema string.

Source code in outlines/models/dottxt.py

def format_output_type(self, output_type: Optional[Any] = None) -> str:
    """Format the output type to pass to the client.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    str
        The output type to pass to the client as a JSON schema string.

    """
    if output_type is None:
        raise TypeError(
            "You must provide an output type. Dottxt only supports "
            "constrained generation."
        )
    elif isinstance(output_type, Regex):
        raise TypeError(
            "Regex-based structured outputs will soon be available with "
            "Dottxt. Use an open source model in the meantime."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs will soon be available with "
            "Dottxt. Use an open source model in the meantime."
        )
    elif JsonSchema.is_json_schema(output_type):
        return cast(str, JsonSchema.convert_to(output_type, ["str"]))
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by Dottxt. "
            "Consider using a local mode instead."
        )

`from_dottxt(client, model=None)`

Create an Outlines Dottxt or AsyncDottxt model instance from a dottxt.DotTxt or dottxt.AsyncDotTxt client instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[DotTxt, AsyncDotTxt]`	A `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.	required
`model`	`Optional[str]`	The model identifier to use (e.g. `"dottxt/dottxt-v1-alpha"`).	`None`

Returns:

Type	Description
`Union[Dottxt, AsyncDottxt]`	An Outlines `Dottxt` or `AsyncDottxt` model instance.

Source code in outlines/models/dottxt.py

def from_dottxt(
    client: "Union[DottxtClient, AsyncDottxtClient]",
    model: Optional[str] = None,
) -> Union[Dottxt, AsyncDottxt]:
    """Create an Outlines `Dottxt` or `AsyncDottxt` model instance from a
    `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.

    Parameters
    ----------
    client
        A `dottxt.DotTxt` or `dottxt.AsyncDotTxt` client instance.
    model
        The model identifier to use (e.g. ``"dottxt/dottxt-v1-alpha"``).

    Returns
    -------
    Union[Dottxt, AsyncDottxt]
        An Outlines `Dottxt` or `AsyncDottxt` model instance.

    """
    from dottxt import AsyncDotTxt, DotTxt

    if isinstance(client, DotTxt):
        return Dottxt(client, model)
    elif isinstance(client, AsyncDotTxt):
        return AsyncDottxt(client, model)
    else:
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "`dottxt.DotTxt` or `dottxt.AsyncDotTxt`."
        )

`gemini`

Integration with Gemini's API.

`Gemini`

Bases: Model

Thin wrapper around the google.genai.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the google.genai.Client client.

Source code in outlines/models/gemini.py

class Gemini(Model):
    """Thin wrapper around the `google.genai.Client` client.

    This wrapper is used to convert the input and output types specified by
    the users at a higher level to arguments to the `google.genai.Client`
    client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            A `google.genai.Client` instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = GeminiTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> str:
        """Generate a response from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The response generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        with normalize_provider_errors(PROVIDER):
            completion = self.client.models.generate_content(
                **contents,
                model=inference_kwargs.pop("model", self.model_name),
                config={**generation_config, **inference_kwargs}
            )

        return completion.text

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "Gemini does not support batch generation."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema, a list of such types, or a multiple choice type.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        contents = self.type_adapter.format_input(model_input)
        generation_config = self.type_adapter.format_output_type(output_type)

        with normalize_provider_errors(PROVIDER):
            stream = self.client.models.generate_content_stream(
                **contents,
                model=inference_kwargs.pop("model", self.model_name),
                config={**generation_config, **inference_kwargs},
            )
            for chunk in stream:
                if hasattr(chunk, "text") and chunk.text:
                    yield chunk.text

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	A `google.genai.Client` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/gemini.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = GeminiTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate a response from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The response generated by the model.

Source code in outlines/models/gemini.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> str:
    """Generate a response from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The response generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    with normalize_provider_errors(PROVIDER):
        completion = self.client.models.generate_content(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs}
        )

    return completion.text

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Generate a stream of responses from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/gemini.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema, a list of such types, or a multiple choice type.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    contents = self.type_adapter.format_input(model_input)
    generation_config = self.type_adapter.format_output_type(output_type)

    with normalize_provider_errors(PROVIDER):
        stream = self.client.models.generate_content_stream(
            **contents,
            model=inference_kwargs.pop("model", self.model_name),
            config={**generation_config, **inference_kwargs},
        )
        for chunk in stream:
            if hasattr(chunk, "text") and chunk.text:
                yield chunk.text

`GeminiTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the Gemini model.

GeminiTypeAdapter is responsible for preparing the arguments to Gemini's client models.generate_content method: the input (prompt and possibly image), as well as the output type (either JSON or multiple choice).

Source code in outlines/models/gemini.py

class GeminiTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Gemini` model.

    `GeminiTypeAdapter` is responsible for preparing the arguments to Gemini's
    client `models.generate_content` method: the input (prompt and possibly
    image), as well as the output type (either JSON or multiple choice).

    """

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the `contents` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        dict
            The `contents` argument to pass to the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "Gemini. The only available types are `str`, `list` and `Chat` "
            "(containing a prompt and images)."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> dict:
        return {"contents": [self._create_text_part(model_input)]}

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> dict:
        return {
            "contents": [
                self._create_message("user", model_input)
            ]
        }

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> dict:
        """Generate the `contents` argument to pass to the client when the user
        passes a Chat instance.

        """
        return {
            "contents": [
                self._create_message(message["role"], message["content"])
                for message in model_input.messages
            ]
        }

    def _create_message(self, role: str, content: str | list) -> dict:
        """Create a message."""

        # Gemini uses "model" instead of "assistant"
        if role == "assistant":
            role = "model"

        if isinstance(content, str):
            return {
                "role": role,
                "parts": [self._create_text_part(content)],
            }

        elif isinstance(content, list):
            prompt = content[0]
            images = content[1:]

            if not all(isinstance(image, Image) for image in images):
                raise ValueError("All assets provided must be of type Image")

            image_parts = [
                self._create_img_part(image)
                for image in images
            ]

            return {
                "role": role,
                "parts": [
                    self._create_text_part(prompt),
                    *image_parts,
                ],
            }

        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                "The content must be a string or a list containing a string "
                "and a list of images."
            )

        return {"contents": [prompt, *image_parts]}


    def _create_text_part(self, text: str) -> dict:
        """Create a text input part for a message."""
        return {
            "text": text,
        }

    def _create_img_part(self, image: Image) -> dict:
        """Create an image input part for a message."""
        return {
            "inline_data": {
                "mime_type": image.image_format,
                "data": image.image_str,
            }
        }

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the `generation_config` argument to pass to the client.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        dict
            The `generation_config` argument to pass to the client.

        """

        # Unsupported output pytes
        if isinstance(output_type, Regex):
            raise TypeError(
                "Neither regex-based structured outputs nor the `pattern` "
                "keyword in Json Schema are available with Gemini. Use an "
                "open source model or dottxt instead."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not available with Gemini. "
                "Use an open source model or dottxt instead."
            )

        if output_type is None:
            return {}

        # JSON schema types
        elif JsonSchema.is_json_schema(output_type):
            return self.format_json_output_type(
                JsonSchema.convert_to(
                    output_type,
                    ["dataclass", "typeddict", "pydantic"]
                )
            )

        # List of structured types
        elif is_typing_list(output_type):
            return self.format_list_output_type(output_type)

        # Multiple choice types
        elif is_enum(output_type):
            return self.format_enum_output_type(output_type)
        elif is_literal(output_type):
            enum = get_enum_from_literal(output_type)
            return self.format_enum_output_type(enum)
        elif isinstance(output_type, Choice):
            enum = get_enum_from_choice(output_type)
            return self.format_enum_output_type(enum)

        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by Gemini. "
                "Consider using a local model or dottxt instead."
            )

    def format_enum_output_type(self, output_type: Optional[Any]) -> dict:
        return {
            "response_mime_type": "text/x.enum",
            "response_schema": output_type,
        }

    def format_json_output_type(self, output_type: Optional[Any]) -> dict:
        return {
            "response_mime_type": "application/json",
            "response_schema": output_type,
        }

    def format_list_output_type(self, output_type: Optional[Any]) -> dict:
        args = get_args(output_type)

        if len(args) == 1:
            item_type = args[0]

            if JsonSchema.is_json_schema(item_type):
                return {
                    "response_mime_type": "application/json",
                    "response_schema": list[  # type: ignore
                        JsonSchema.convert_to(
                            item_type,
                            ["dataclass", "typeddict", "pydantic"]
                        )
                    ],
                }
            else:
                raise TypeError(
                    "The list items output type must contain a JSON schema "
                    "type."
                )

        raise TypeError(
            f"Gemini only supports homogeneous lists: "
            "list[BaseModel], list[TypedDict] or list[dataclass]. "
            f"Got {output_type} instead."
        )

`format_chat_model_input(model_input)`

Generate the contents argument to pass to the client when the user passes a Chat instance.

Source code in outlines/models/gemini.py

@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> dict:
    """Generate the `contents` argument to pass to the client when the user
    passes a Chat instance.

    """
    return {
        "contents": [
            self._create_message(message["role"], message["content"])
            for message in model_input.messages
        ]
    }

`format_input(model_input)`

Generate the contents argument to pass to the client.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`dict`	The `contents` argument to pass to the client.

Source code in outlines/models/gemini.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the `contents` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    dict
        The `contents` argument to pass to the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "Gemini. The only available types are `str`, `list` and `Chat` "
        "(containing a prompt and images)."
    )

`format_output_type(output_type=None)`

Generate the generation_config argument to pass to the client.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`dict`	The `generation_config` argument to pass to the client.

Source code in outlines/models/gemini.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the `generation_config` argument to pass to the client.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    dict
        The `generation_config` argument to pass to the client.

    """

    # Unsupported output pytes
    if isinstance(output_type, Regex):
        raise TypeError(
            "Neither regex-based structured outputs nor the `pattern` "
            "keyword in Json Schema are available with Gemini. Use an "
            "open source model or dottxt instead."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not available with Gemini. "
            "Use an open source model or dottxt instead."
        )

    if output_type is None:
        return {}

    # JSON schema types
    elif JsonSchema.is_json_schema(output_type):
        return self.format_json_output_type(
            JsonSchema.convert_to(
                output_type,
                ["dataclass", "typeddict", "pydantic"]
            )
        )

    # List of structured types
    elif is_typing_list(output_type):
        return self.format_list_output_type(output_type)

    # Multiple choice types
    elif is_enum(output_type):
        return self.format_enum_output_type(output_type)
    elif is_literal(output_type):
        enum = get_enum_from_literal(output_type)
        return self.format_enum_output_type(enum)
    elif isinstance(output_type, Choice):
        enum = get_enum_from_choice(output_type)
        return self.format_enum_output_type(enum)

    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by Gemini. "
            "Consider using a local model or dottxt instead."
        )

`from_gemini(client, model_name=None)`

Create an Outlines Gemini model instance from a google.genai.Client instance.

Parameters:

Name	Type	Description	Default
`client`	`Client`	A `google.genai.Client` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Gemini`	An Outlines `Gemini` model instance.

Source code in outlines/models/gemini.py

def from_gemini(client: "Client", model_name: Optional[str] = None) -> Gemini:
    """Create an Outlines `Gemini` model instance from a
    `google.genai.Client` instance.

    Parameters
    ----------
    client
        A `google.genai.Client` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Gemini
        An Outlines `Gemini` model instance.

    """
    return Gemini(client, model_name)

`llamacpp`

Integration with the llama-cpp-python library.

`LlamaCpp`

Bases: Model

Thin wrapper around the llama_cpp.Llama model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the llama_cpp.Llama model.

Source code in outlines/models/llamacpp.py

class LlamaCpp(Model):
    """Thin wrapper around the `llama_cpp.Llama` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `llama_cpp.Llama` model.
    """

    tensor_library_name = "numpy"

    def __init__(self, model: "Llama", chat_mode: bool = True):
        """
        Parameters
        ----------
        model
            A `llama_cpp.Llama` model instance.
        chat_mode
            Whether to enable chat mode. If `False`, the model will regard
            all `str` inputs as plain text prompts. If `True`, the model will
            regard all `str` inputs as user messages in a chat conversation.

        """
        self.model = model
        self.tokenizer = LlamaCppTokenizer(self.model)

        # Note: llama-cpp-python provides a default chat-template fallback even when
        # the user hasn't explicitly configured one:
        # https://github.com/abetlen/llama-cpp-python/blob/c37132b/llama_cpp/llama.py#L540-L545
        # We keep the default as True because the upstream library generally favors chat-style usage.
        self.type_adapter = LlamaCppTypeAdapter(has_chat_template=chat_mode)

    def generate(
        self,
        model_input: Union[Chat, str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        prompt = self.type_adapter.format_input(model_input)

        if isinstance(prompt, str):
            completion = self.model(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                **inference_kwargs,
            )
            result = completion["choices"][0]["text"]
        elif isinstance(prompt, list):
            completion = self.model.create_chat_completion(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                **inference_kwargs,
            )
            result = completion["choices"][0]["message"]["content"]
        else:  # Never reached  # pragma: no cover
            raise ValueError("Unexpected prompt type.")

        self.model.reset()

        return result

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("LlamaCpp does not support batch generation.")

    def generate_stream(
        self,
        model_input: Union[Chat, str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using `llama-cpp-python`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        **inference_kwargs
            Additional keyword arguments to pass to the `Llama.__call__`
            method of the `llama-cpp-python` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        prompt = self.type_adapter.format_input(model_input)

        if isinstance(prompt, str):
            generator = self.model(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                stream=True,
                **inference_kwargs,
            )
            for chunk in generator:
                yield chunk["choices"][0]["text"]

        elif isinstance(prompt, list):
            generator = self.model.create_chat_completion(
                prompt,
                logits_processor=self.type_adapter.format_output_type(output_type),
                stream=True,
                **inference_kwargs,
            )
            for chunk in generator:
                yield chunk["choices"][0]["delta"].get("content", "")
        else:  # Never reached  # pragma: no cover
            raise ValueError("Unexpected prompt type.")

`init(model, chat_mode=True)`

Parameters:

Name	Type	Description	Default
`model`	`Llama`	A `llama_cpp.Llama` model instance.	required
`chat_mode`	`bool`	Whether to enable chat mode. If `False`, the model will regard all `str` inputs as plain text prompts. If `True`, the model will regard all `str` inputs as user messages in a chat conversation.	`True`

Source code in outlines/models/llamacpp.py

def __init__(self, model: "Llama", chat_mode: bool = True):
    """
    Parameters
    ----------
    model
        A `llama_cpp.Llama` model instance.
    chat_mode
        Whether to enable chat mode. If `False`, the model will regard
        all `str` inputs as plain text prompts. If `True`, the model will
        regard all `str` inputs as user messages in a chat conversation.

    """
    self.model = model
    self.tokenizer = LlamaCppTokenizer(self.model)

    # Note: llama-cpp-python provides a default chat-template fallback even when
    # the user hasn't explicitly configured one:
    # https://github.com/abetlen/llama-cpp-python/blob/c37132b/llama_cpp/llama.py#L540-L545
    # We keep the default as True because the upstream library generally favors chat-style usage.
    self.type_adapter = LlamaCppTypeAdapter(has_chat_template=chat_mode)

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using llama-cpp-python.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the `Llama.__call__` method of the `llama-cpp-python` library.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/llamacpp.py

def generate(
    self,
    model_input: Union[Chat, str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    prompt = self.type_adapter.format_input(model_input)

    if isinstance(prompt, str):
        completion = self.model(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            **inference_kwargs,
        )
        result = completion["choices"][0]["text"]
    elif isinstance(prompt, list):
        completion = self.model.create_chat_completion(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            **inference_kwargs,
        )
        result = completion["choices"][0]["message"]["content"]
    else:  # Never reached  # pragma: no cover
        raise ValueError("Unexpected prompt type.")

    self.model.reset()

    return result

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using llama-cpp-python.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the `Llama.__call__` method of the `llama-cpp-python` library.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/llamacpp.py

def generate_stream(
    self,
    model_input: Union[Chat, str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using `llama-cpp-python`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    **inference_kwargs
        Additional keyword arguments to pass to the `Llama.__call__`
        method of the `llama-cpp-python` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    prompt = self.type_adapter.format_input(model_input)

    if isinstance(prompt, str):
        generator = self.model(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            stream=True,
            **inference_kwargs,
        )
        for chunk in generator:
            yield chunk["choices"][0]["text"]

    elif isinstance(prompt, list):
        generator = self.model.create_chat_completion(
            prompt,
            logits_processor=self.type_adapter.format_output_type(output_type),
            stream=True,
            **inference_kwargs,
        )
        for chunk in generator:
            yield chunk["choices"][0]["delta"].get("content", "")
    else:  # Never reached  # pragma: no cover
        raise ValueError("Unexpected prompt type.")

`LlamaCppTokenizer`

Bases: Tokenizer

Source code in outlines/models/llamacpp.py

class LlamaCppTokenizer(Tokenizer):
    def __init__(self, model: "Llama"):
        self.tokenizer = model.tokenizer()
        self.special_tokens: Set[str] = set()
        self.vocabulary: Dict[str, int] = dict()

        # TODO: Remove when https://github.com/ggerganov/llama.cpp/pull/5613
        # is resolved
        self._hf_tokenizer = None
        if (
            hasattr(model, "tokenizer_")
            and hasattr(model.tokenizer_, "hf_tokenizer")
        ):
            self._hf_tokenizer = model.tokenizer_.hf_tokenizer
            self.eos_token_id = self._hf_tokenizer.eos_token_id
            self.eos_token = self._hf_tokenizer.eos_token
            self.vocabulary = self._hf_tokenizer.get_vocab()
        else:
            from llama_cpp import (
                llama_model_get_vocab,
                llama_token_to_piece,
            )

            self.eos_token_id = model.token_eos()
            size = 32
            buffer = (ctypes.c_char * size)()
            vocab = llama_model_get_vocab(model.model)
            for i in range(model.n_vocab()):
                n = llama_token_to_piece(
                    vocab,
                    i,
                    buffer,
                    size,
                    0,
                    True
                )
                # n < 0 is an error return from llama_token_to_piece;
                # skip invalid tokens so they don't pollute the vocabulary.
                if n < 0:
                    continue
                # n > size means the piece was truncated; retry with a
                # larger buffer so distinct tokens are not collapsed.
                if n > size:
                    big = (ctypes.c_char * n)()
                    llama_token_to_piece(vocab, i, big, n, 0, True)
                    token_piece = big[:n].decode("utf-8", errors="replace")  # type: ignore
                else:
                    token_piece = buffer[:n].decode("utf-8", errors="replace")  # type: ignore
                self.vocabulary[token_piece] = i
                if i == self.eos_token_id:
                    self.eos_token = token_piece

        self.pad_token_id = self.eos_token_id
        # ensure stable ordering of vocabulary
        self.vocabulary = {
            tok: tok_id
            for tok, tok_id
            in sorted(self.vocabulary.items(), key=lambda x: x[1])
        }
        self._hash = None

    def decode(self, token_ids: List[int]) -> List[str]:
        decoded_bytes = self.tokenizer.detokenize(token_ids)
        return [decoded_bytes.decode("utf-8", errors="ignore")]

    def encode(
        self,
        prompt: Union[str, List[str]],
        add_bos: bool = True,
        special: bool = True,
    ) -> Tuple[List[int], List[int]]:
        if isinstance(prompt, list):
            raise NotImplementedError(
                "llama-cpp-python tokenizer doesn't support batch tokenization"
            )
        token_ids = self.tokenizer.tokenize(
            prompt.encode("utf-8", errors="ignore"),
            add_bos=add_bos,
            special=special,
        )
        # generate attention mask, missing from llama-cpp-python.
        # For a single (non-batched) prompt there is no real padding, so
        # every token — including EOS when it appears inside the prompt —
        # should be attended.  We therefore always set the mask to 1.
        attention_mask = [1] * len(token_ids)
        return token_ids, attention_mask

    def convert_token_to_string(self, token: str) -> str:
        if self._hf_tokenizer is not None:
            token_str = self._hf_tokenizer.convert_tokens_to_string([token])
            if (
                token.startswith(SPIECE_UNDERLINE)
                or token == "<0x20>"
            ):  # pragma: no cover
                token_str = " " + token_str
            return token_str
        else:
            return token

    def __eq__(self, other):
        if not isinstance(other, LlamaCppTokenizer):
            return False
        return self.__getstate__() == other.__getstate__()

    def __hash__(self):
        # We create a custom hash as pickle.dumps(self) is not stable
        if self._hash is None:
            self._hash = hash((
                tuple(sorted(self.vocabulary.items())),
                self.eos_token_id,
                self.eos_token,
                self.pad_token_id,
                tuple(sorted(self.special_tokens)),
            ))
        return self._hash

    def __getstate__(self):
        """Create a stable representation for outlines.caching"""
        return (
            self.vocabulary,
            self.eos_token_id,
            self.eos_token,
            self.pad_token_id,
            sorted(self.special_tokens),
        )

    def __setstate__(self, state):
        raise NotImplementedError("Cannot load a pickled llamacpp tokenizer")

`getstate()`

Create a stable representation for outlines.caching

Source code in outlines/models/llamacpp.py

def __getstate__(self):
    """Create a stable representation for outlines.caching"""
    return (
        self.vocabulary,
        self.eos_token_id,
        self.eos_token,
        self.pad_token_id,
        sorted(self.special_tokens),
    )

`LlamaCppTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the LlamaCpp model.

LlamaCppTypeAdapter is responsible for preparing the arguments to the Llama object text generation methods.

Source code in outlines/models/llamacpp.py

class LlamaCppTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `LlamaCpp` model.

    `LlamaCppTypeAdapter` is responsible for preparing the arguments to
    the `Llama` object text generation methods.

    """

    def __init__(self, has_chat_template: bool = False):
        """
        Parameters
        ----------
        has_chat_template
            Whether the model has a chat template defined.
        """
        self.has_chat_template = has_chat_template

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {type(model_input)} is not available with "
            "LlamaCpp. The only available types are `str` and `Chat`."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str | list:
        if self.has_chat_template:
            return [{"role": "user", "content": model_input}]
        return model_input

    @format_input.register(Chat)
    def format_chat_input(self, model_input: Chat) -> list:
        if not all(
            isinstance(message["content"], str)
            for message in model_input.messages
        ):
            raise ValueError(
                "LlamaCpp does not support multi-modal messages."
                + "The content of each message must be a string."
            )

        return  [
            {
                "role": message["role"],
                "content": message["content"],
            }
            for message in model_input.messages
        ]

    def format_output_type(
        self, output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional["LogitsProcessorList"]:
        """Generate the logits processor argument to pass to the model.

        Parameters
        ----------
        output_type
            The logits processor provided.

        Returns
        -------
        LogitsProcessorList
            The logits processor to pass to the model.

        """
        from llama_cpp import LogitsProcessorList

        if output_type is not None:
            return LogitsProcessorList([output_type])
        return None

`init(has_chat_template=False)`

Parameters:

Name	Type	Description	Default
`has_chat_template`	`bool`	Whether the model has a chat template defined.	`False`

Source code in outlines/models/llamacpp.py

def __init__(self, has_chat_template: bool = False):
    """
    Parameters
    ----------
    has_chat_template
        Whether the model has a chat template defined.
    """
    self.has_chat_template = has_chat_template

`format_input(model_input)`

Generate the prompt argument to pass to the model.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`str`	The formatted input to be passed to the model.

Source code in outlines/models/llamacpp.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {type(model_input)} is not available with "
        "LlamaCpp. The only available types are `str` and `Chat`."
    )

`format_output_type(output_type=None)`

Generate the logits processor argument to pass to the model.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor provided.	`None`

Returns:

Type	Description
`LogitsProcessorList`	The logits processor to pass to the model.

Source code in outlines/models/llamacpp.py

def format_output_type(
    self, output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional["LogitsProcessorList"]:
    """Generate the logits processor argument to pass to the model.

    Parameters
    ----------
    output_type
        The logits processor provided.

    Returns
    -------
    LogitsProcessorList
        The logits processor to pass to the model.

    """
    from llama_cpp import LogitsProcessorList

    if output_type is not None:
        return LogitsProcessorList([output_type])
    return None

`from_llamacpp(model, chat_mode=True)`

Create an Outlines LlamaCpp model instance from a llama_cpp.Llama instance.

Parameters:

Name	Type	Description	Default
`model`	`Llama`	A `llama_cpp.Llama` instance.	required
`chat_mode`	`bool`	Whether to enable chat mode. If `False`, the model will regard all `str` inputs as plain text prompts. If `True`, the model will regard all `str` inputs as user messages in a chat conversation.	`True`

Returns:

Type	Description
`LlamaCpp`	An Outlines `LlamaCpp` model instance.

Source code in outlines/models/llamacpp.py

def from_llamacpp(model: "Llama", chat_mode: bool = True) -> LlamaCpp:
    """Create an Outlines `LlamaCpp` model instance from a
    `llama_cpp.Llama` instance.

    Parameters
    ----------
    model
        A `llama_cpp.Llama` instance.
    chat_mode
        Whether to enable chat mode. If `False`, the model will regard
        all `str` inputs as plain text prompts. If `True`, the model will
        regard all `str` inputs as user messages in a chat conversation.

    Returns
    -------
    LlamaCpp
        An Outlines `LlamaCpp` model instance.

    """
    return LlamaCpp(model, chat_mode=chat_mode)

`lmstudio`

Integration with the lmstudio library.

Local runtime calls intentionally bypass outlines.exceptions.normalize_provider_errors().

`AsyncLMStudio`

Bases: AsyncModel

Thin wrapper around a lmstudio.AsyncClient client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the LMStudio async client.

Source code in outlines/models/lmstudio.py

class AsyncLMStudio(AsyncModel):
    """Thin wrapper around a `lmstudio.AsyncClient` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the LMStudio async client.

    """

    def __init__(
        self, client: "AsyncClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client
            A LMStudio AsyncClient instance.
        model_name
            The name of the model to use. If not provided, uses the default
            loaded model in LMStudio.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = LMStudioTypeAdapter()
        self._context_entered = False

    async def close(self) -> None:
        """Close the async client and release resources."""
        if self._context_entered:
            await self.client.__aexit__(None, None, None)
            self._context_entered = False

    async def generate(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using LMStudio asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        str
            The text generated by the model.

        """
        if not self._context_entered:
            await self.client.__aenter__()
            self._context_entered = True

        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        result = await model.respond(formatted_input, **kwargs)
        return result.content

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `lmstudio` library does not support batch inference."
        )

    async def generate_stream(  # type: ignore
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using LMStudio asynchronously.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        if not self._context_entered:
            await self.client.__aenter__()
            self._context_entered = True

        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        stream = await model.respond_stream(formatted_input, **kwargs)
        async for fragment in stream:
            yield fragment.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncClient`	A LMStudio AsyncClient instance.	required
`model_name`	`Optional[str]`	The name of the model to use. If not provided, uses the default loaded model in LMStudio.	`None`

Source code in outlines/models/lmstudio.py

def __init__(
    self, client: "AsyncClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client
        A LMStudio AsyncClient instance.
    model_name
        The name of the model to use. If not provided, uses the default
        loaded model in LMStudio.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = LMStudioTypeAdapter()
    self._context_entered = False

`close()` `async`

Close the async client and release resources.

Source code in outlines/models/lmstudio.py

async def close(self) -> None:
    """Close the async client and release resources."""
    if self._context_entered:
        await self.client.__aexit__(None, None, None)
        self._context_entered = False

`generate(model_input, output_type=None, **kwargs)` `async`

Generate text using LMStudio asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/lmstudio.py

async def generate(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using LMStudio asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    str
        The text generated by the model.

    """
    if not self._context_entered:
        await self.client.__aenter__()
        self._context_entered = True

    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    result = await model.respond(formatted_input, **kwargs)
    return result.content

`generate_stream(model_input, output_type=None, **kwargs)` `async`

Stream text using LMStudio asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/lmstudio.py

async def generate_stream(  # type: ignore
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using LMStudio asynchronously.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    if not self._context_entered:
        await self.client.__aenter__()
        self._context_entered = True

    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    stream = await model.respond_stream(formatted_input, **kwargs)
    async for fragment in stream:
        yield fragment.content

`LMStudio`

Bases: Model

Thin wrapper around a lmstudio.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the LMStudio client.

Source code in outlines/models/lmstudio.py

class LMStudio(Model):
    """Thin wrapper around a `lmstudio.Client` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the LMStudio client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            A LMStudio Client instance obtained via `lmstudio.Client()` or
            `lmstudio.get_default_client()`.
        model_name
            The name of the model to use. If not provided, uses the default
            loaded model in LMStudio.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = LMStudioTypeAdapter()

    def generate(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using LMStudio.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        result = model.respond(formatted_input, **kwargs)
        return result.content

    def generate_batch(
        self,
        model_input,
        output_type=None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `lmstudio` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using LMStudio.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the model.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        model_key = kwargs.pop("model", None)
        model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

        formatted_input = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if response_format is not None:
            kwargs["response_format"] = response_format

        stream = model.respond_stream(formatted_input, **kwargs)
        for fragment in stream:
            yield fragment.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	A LMStudio Client instance obtained via `lmstudio.Client()` or `lmstudio.get_default_client()`.	required
`model_name`	`Optional[str]`	The name of the model to use. If not provided, uses the default loaded model in LMStudio.	`None`

Source code in outlines/models/lmstudio.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        A LMStudio Client instance obtained via `lmstudio.Client()` or
        `lmstudio.get_default_client()`.
    model_name
        The name of the model to use. If not provided, uses the default
        loaded model in LMStudio.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = LMStudioTypeAdapter()

`generate(model_input, output_type=None, **kwargs)`

Generate text using LMStudio.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/lmstudio.py

def generate(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using LMStudio.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    result = model.respond(formatted_input, **kwargs)
    return result.content

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using LMStudio.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the model.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/lmstudio.py

def generate_stream(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> Iterator[str]:
    """Stream text using LMStudio.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the model.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    model_key = kwargs.pop("model", None)
    model = self.client.llm.model(model_key) if model_key else self.client.llm.model()

    formatted_input = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if response_format is not None:
        kwargs["response_format"] = response_format

    stream = model.respond_stream(formatted_input, **kwargs)
    for fragment in stream:
        yield fragment.content

`LMStudioTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the LMStudio model.

Source code in outlines/models/lmstudio.py

class LMStudioTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `LMStudio` model."""

    def _prepare_lmstudio_image(self, image: Image):
        """Convert Outlines Image to LMStudio image handle.

        LMStudio's SDK only accepts file paths, raw bytes, or binary IO objects.
        Unlike Ollama which accepts base64 directly, we must decode from base64.
        """
        import base64

        import lmstudio as lms

        image_bytes = base64.b64decode(image.image_str)
        return lms.prepare_image(image_bytes)

    @singledispatchmethod
    def format_input(self, model_input):
        """Format input for LMStudio model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str | LMStudioChat
            The formatted input to be passed to the model.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "LMStudio. The only available types are `str`, `list` and `Chat`."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> str:
        """Pass through string input directly to LMStudio."""
        return model_input

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> "LMStudioChat":
        """Handle list input containing prompt and images."""
        from lmstudio import Chat as LMSChat

        prompt = model_input[0]
        images = model_input[1:]

        if not all(isinstance(img, Image) for img in images):
            raise ValueError("All assets provided must be of type Image")

        chat = LMSChat()
        image_handles = [self._prepare_lmstudio_image(img) for img in images]
        chat.add_user_message(prompt, images=image_handles)
        return chat

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> "LMStudioChat":
        """Convert Outlines Chat to LMStudio Chat with image support."""
        from lmstudio import Chat as LMSChat

        system_prompt = None
        messages = model_input.messages

        if messages and messages[0]["role"] == "system":
            system_prompt = messages[0]["content"]
            messages = messages[1:]

        chat = LMSChat(system_prompt) if system_prompt else LMSChat()

        for message in messages:
            role = message["role"]
            content = message["content"]

            if role == "user":
                if isinstance(content, str):
                    chat.add_user_message(content)
                elif isinstance(content, list):
                    prompt = content[0]
                    images = content[1:]
                    if not all(isinstance(img, Image) for img in images):
                        raise ValueError("All assets provided must be of type Image")
                    image_handles = [self._prepare_lmstudio_image(img) for img in images]
                    chat.add_user_message(prompt, images=image_handles)
                else:
                    raise ValueError(
                        f"Invalid content type: {type(content)}. "
                        "The content must be a string or a list containing a string "
                        "and a list of images."
                    )
            elif role == "assistant":
                chat.add_assistant_response(content)
            else:
                raise ValueError(f"Unsupported role: {role}")

        return chat

    def format_output_type(
        self, output_type: Optional[Any] = None
    ) -> Optional[dict]:
        """Format the output type to pass to the model.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        Optional[dict]
            The formatted output type (JSON schema) to be passed to the model.

        """
        if output_type is None:
            return None
        elif isinstance(output_type, Regex):
            raise TypeError(
                "Regex-based structured outputs are not supported by LMStudio. "
                "Use an open source model in the meantime."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not supported by LMStudio. "
                "Use an open source model in the meantime."
            )
        elif JsonSchema.is_json_schema(output_type):
            return cast(dict, JsonSchema.convert_to(output_type, ["dict"]))
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by LMStudio. "
                "Consider using a local model instead."
            )

`format_chat_model_input(model_input)`

Convert Outlines Chat to LMStudio Chat with image support.

Source code in outlines/models/lmstudio.py

@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> "LMStudioChat":
    """Convert Outlines Chat to LMStudio Chat with image support."""
    from lmstudio import Chat as LMSChat

    system_prompt = None
    messages = model_input.messages

    if messages and messages[0]["role"] == "system":
        system_prompt = messages[0]["content"]
        messages = messages[1:]

    chat = LMSChat(system_prompt) if system_prompt else LMSChat()

    for message in messages:
        role = message["role"]
        content = message["content"]

        if role == "user":
            if isinstance(content, str):
                chat.add_user_message(content)
            elif isinstance(content, list):
                prompt = content[0]
                images = content[1:]
                if not all(isinstance(img, Image) for img in images):
                    raise ValueError("All assets provided must be of type Image")
                image_handles = [self._prepare_lmstudio_image(img) for img in images]
                chat.add_user_message(prompt, images=image_handles)
            else:
                raise ValueError(
                    f"Invalid content type: {type(content)}. "
                    "The content must be a string or a list containing a string "
                    "and a list of images."
                )
        elif role == "assistant":
            chat.add_assistant_response(content)
        else:
            raise ValueError(f"Unsupported role: {role}")

    return chat

`format_input(model_input)`

Format input for LMStudio model.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`str \| Chat`	The formatted input to be passed to the model.

Source code in outlines/models/lmstudio.py

@singledispatchmethod
def format_input(self, model_input):
    """Format input for LMStudio model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str | LMStudioChat
        The formatted input to be passed to the model.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "LMStudio. The only available types are `str`, `list` and `Chat`."
    )

`format_list_model_input(model_input)`

Handle list input containing prompt and images.

Source code in outlines/models/lmstudio.py

@format_input.register(list)
def format_list_model_input(self, model_input: list) -> "LMStudioChat":
    """Handle list input containing prompt and images."""
    from lmstudio import Chat as LMSChat

    prompt = model_input[0]
    images = model_input[1:]

    if not all(isinstance(img, Image) for img in images):
        raise ValueError("All assets provided must be of type Image")

    chat = LMSChat()
    image_handles = [self._prepare_lmstudio_image(img) for img in images]
    chat.add_user_message(prompt, images=image_handles)
    return chat

`format_output_type(output_type=None)`

Format the output type to pass to the model.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`Optional[dict]`	The formatted output type (JSON schema) to be passed to the model.

Source code in outlines/models/lmstudio.py

def format_output_type(
    self, output_type: Optional[Any] = None
) -> Optional[dict]:
    """Format the output type to pass to the model.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    Optional[dict]
        The formatted output type (JSON schema) to be passed to the model.

    """
    if output_type is None:
        return None
    elif isinstance(output_type, Regex):
        raise TypeError(
            "Regex-based structured outputs are not supported by LMStudio. "
            "Use an open source model in the meantime."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not supported by LMStudio. "
            "Use an open source model in the meantime."
        )
    elif JsonSchema.is_json_schema(output_type):
        return cast(dict, JsonSchema.convert_to(output_type, ["dict"]))
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by LMStudio. "
            "Consider using a local model instead."
        )

`format_str_model_input(model_input)`

Pass through string input directly to LMStudio.

Source code in outlines/models/lmstudio.py

@format_input.register(str)
def format_str_model_input(self, model_input: str) -> str:
    """Pass through string input directly to LMStudio."""
    return model_input

`from_lmstudio(client, model_name=None)`

Create an Outlines LMStudio model instance from a lmstudio.Client or lmstudio.AsyncClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[Client, AsyncClient]`	A `lmstudio.Client` or `lmstudio.AsyncClient` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[LMStudio, AsyncLMStudio]`	An Outlines `LMStudio` or `AsyncLMStudio` model instance.

Source code in outlines/models/lmstudio.py

def from_lmstudio(
    client: Union["Client", "AsyncClient"],
    model_name: Optional[str] = None,
) -> Union[LMStudio, AsyncLMStudio]:
    """Create an Outlines `LMStudio` model instance from a
    `lmstudio.Client` or `lmstudio.AsyncClient` instance.

    Parameters
    ----------
    client
        A `lmstudio.Client` or `lmstudio.AsyncClient` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[LMStudio, AsyncLMStudio]
        An Outlines `LMStudio` or `AsyncLMStudio` model instance.

    """
    from lmstudio import AsyncClient, Client

    if isinstance(client, Client):
        return LMStudio(client, model_name)
    elif isinstance(client, AsyncClient):
        return AsyncLMStudio(client, model_name)
    else:
        raise ValueError(
            "Invalid client type, the client must be an instance of "
            "`lmstudio.Client` or `lmstudio.AsyncClient`."
        )

`mistral`

Integration with Mistral AI API.

`AsyncMistral`

Bases: AsyncModel

Async thin wrapper around the mistralai.Mistral client.

Converts input and output types to arguments for the mistralai.Mistral client's async methods (chat.complete_async or chat.stream_async).

Source code in outlines/models/mistral.py

class AsyncMistral(AsyncModel):
    """Async thin wrapper around the `mistralai.Mistral` client.

    Converts input and output types to arguments for the `mistralai.Mistral`
    client's async methods (`chat.complete_async` or `chat.stream_async`).

    """

    def __init__(
        self, client: "MistralClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client : MistralClient
            A mistralai.Mistral client instance.
        model_name : Optional[str]
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = MistralTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate a response from the model asynchronously.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs : Any
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The response generated by the model as text.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = await self.client.chat.complete_async(
                messages=messages,
                response_format=response_format,
                stream=False,
                **inference_kwargs,
            )

        outputs = [choice.message for choice in result.choices]

        if len(outputs) == 1:
            return outputs[0].content
        else:
            return [m.content for m in outputs]

    async def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The mistralai library does not support batch inference."
        )

    async def generate_stream(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        """Generate text from the model as an async stream of chunks.

        Parameters
        ----------
        model_input
            str, list, or chat input to generate from.
        output_type
            Optional type for structured output.
        **inference_kwargs
            Extra kwargs like "model" name.

        Yields
        ------
        str
            Chunks of text as they are streamed.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.stream_async(
                messages=messages,
                response_format=response_format,
                **inference_kwargs
            )
            async for chunk in response:
                if (
                    hasattr(chunk, "data")
                    and chunk.data.choices
                    and len(chunk.data.choices) > 0
                    and hasattr(chunk.data.choices[0], "delta")
                    and chunk.data.choices[0].delta.content is not None
                ):
                    yield chunk.data.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/mistral.py

def __init__(
    self, client: "MistralClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = MistralTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate a response from the model asynchronously.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The response generated by the model as text.

Source code in outlines/models/mistral.py

async def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate a response from the model asynchronously.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs : Any
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The response generated by the model as text.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = await self.client.chat.complete_async(
            messages=messages,
            response_format=response_format,
            stream=False,
            **inference_kwargs,
        )

    outputs = [choice.message for choice in result.choices]

    if len(outputs) == 1:
        return outputs[0].content
    else:
        return [m.content for m in outputs]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Generate text from the model as an async stream of chunks.

Parameters:

Name	Description	Default
`model_input`	str, list, or chat input to generate from.	required
`output_type`	Optional type for structured output.	`None`
`**inference_kwargs`	Extra kwargs like "model" name.	`{}`

Yields:

Type	Description
`str`	Chunks of text as they are streamed.

Source code in outlines/models/mistral.py

async def generate_stream(
    self,
    model_input,
    output_type=None,
    **inference_kwargs,
):
    """Generate text from the model as an async stream of chunks.

    Parameters
    ----------
    model_input
        str, list, or chat input to generate from.
    output_type
        Optional type for structured output.
    **inference_kwargs
        Extra kwargs like "model" name.

    Yields
    ------
    str
        Chunks of text as they are streamed.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.stream_async(
            messages=messages,
            response_format=response_format,
            **inference_kwargs
        )
        async for chunk in response:
            if (
                hasattr(chunk, "data")
                and chunk.data.choices
                and len(chunk.data.choices) > 0
                and hasattr(chunk.data.choices[0], "delta")
                and chunk.data.choices[0].delta.content is not None
            ):
                yield chunk.data.choices[0].delta.content

`Mistral`

Bases: Model

Thin wrapper around the mistralai.Mistral client.

Converts input and output types to arguments for the mistralai.Mistral client's chat.complete or chat.stream methods.

Source code in outlines/models/mistral.py

class Mistral(Model):
    """Thin wrapper around the `mistralai.Mistral` client.

    Converts input and output types to arguments for the `mistralai.Mistral`
    client's `chat.complete` or `chat.stream` methods.

    """

    def __init__(
        self, client: "MistralClient", model_name: Optional[str] = None
    ):
        """
        Parameters
        ----------
        client : MistralClient
            A mistralai.Mistral client instance.
        model_name : Optional[str]
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = MistralTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate a response from the model.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs : Any
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The response generated by the model as text.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = self.client.chat.complete(
                messages=messages,
                response_format=response_format,
                **inference_kwargs,
            )

        outputs = [choice.message for choice in result.choices]

        if len(outputs) == 1:
            return outputs[0].content
        else:
            return [m.content for m in outputs]

    def generate_batch(
        self,
        model_input,
        output_type=None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `mistralai` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Generate a stream of responses from the model.

        Parameters
        ----------
        model_input : Union[Chat, list, str]
            The prompt or chat messages to generate a response from.
        output_type : Optional[Any]
            The desired format of the response (e.g., JSON schema).
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text chunks generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.stream(
                messages=messages,
                response_format=response_format,
                **inference_kwargs
            )
            for chunk in stream:
                if (
                    hasattr(chunk, "data")
                    and chunk.data.choices
                    and chunk.data.choices[0].delta.content is not None
                ):
                    yield chunk.data.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/mistral.py

def __init__(
    self, client: "MistralClient", model_name: Optional[str] = None
):
    """
    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = MistralTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate a response from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The response generated by the model as text.

Source code in outlines/models/mistral.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate a response from the model.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs : Any
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The response generated by the model as text.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = self.client.chat.complete(
            messages=messages,
            response_format=response_format,
            **inference_kwargs,
        )

    outputs = [choice.message for choice in result.choices]

    if len(outputs) == 1:
        return outputs[0].content
    else:
        return [m.content for m in outputs]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Generate a stream of responses from the model.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt or chat messages to generate a response from.	required
`output_type`	`Optional[Any]`	The desired format of the response (e.g., JSON schema).	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text chunks generated by the model.

Source code in outlines/models/mistral.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Generate a stream of responses from the model.

    Parameters
    ----------
    model_input : Union[Chat, list, str]
        The prompt or chat messages to generate a response from.
    output_type : Optional[Any]
        The desired format of the response (e.g., JSON schema).
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text chunks generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.stream(
            messages=messages,
            response_format=response_format,
            **inference_kwargs
        )
        for chunk in stream:
            if (
                hasattr(chunk, "data")
                and chunk.data.choices
                and chunk.data.choices[0].delta.content is not None
            ):
                yield chunk.data.choices[0].delta.content

`MistralTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the Mistral model.

Prepares arguments for Mistral's client chat.complete, chat.complete_async, or chat.stream methods. Handles input (prompt or chat messages) and output type (JSON schema types).

Source code in outlines/models/mistral.py

class MistralTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Mistral` model.

    Prepares arguments for Mistral's client `chat.complete`,
    `chat.complete_async`, or `chat.stream` methods. Handles input (prompt or
    chat messages) and output type (JSON schema types).
    """

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the `messages` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        list
            The `messages` argument to pass to the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "Mistral. The only available types are `str`, `list` and `Chat`."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> list:
        """Format a string input into a list of messages.

        Parameters
        ----------
        model_input : str
            The input string prompt.

        Returns
        -------
        list
            A list of Mistral message objects.

        """
        from mistralai import UserMessage

        return [UserMessage(content=model_input)]

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> list:
        """Format a list input into a list of messages.

        Parameters
        ----------
        model_input : list
            The input list, containing a string prompt and optionally Image
            objects (vision models only).

        Returns
        -------
        list
            A list of Mistral message objects.

        """
        from mistralai import UserMessage

        return [UserMessage(content=self._create_message_content(model_input))]

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> list:
        """Format a Chat input into a list of messages.

        Parameters
        ----------
        model_input : Chat
            The Chat object containing a list of message dictionaries.

        Returns
        -------
        list
            A list of Mistral message objects.

        """
        from mistralai import UserMessage, AssistantMessage, SystemMessage

        messages = []

        for message in model_input.messages:
            role = message["role"]
            content = message["content"]
            if role == "user":
                messages.append(
                    UserMessage(content=self._create_message_content(content))
                )
            elif role == "assistant":
                messages.append(AssistantMessage(content=content))
            elif role == "system":
                messages.append(SystemMessage(content=content))
            else:
                raise ValueError(f"Unsupported role: {role}")

        return messages

    def _create_message_content(
        self, content: Union[str, list]
    ) -> Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]:
        """Create message content from an input.

        Parameters
        ----------
        content : Union[str, list]
            The content to format, either a string or a list containing a
            string and optionally Image objects.

        Returns
        -------
        Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]
            The formatted content, either a string or a list of content parts
            (text and image URLs).

        """
        if isinstance(content, str):
            return content
        elif isinstance(content, list):
            if not content:
                raise ValueError("Content list cannot be empty.")
            if not isinstance(content[0], str):
                raise ValueError(
                    "The first item in the list should be a string."
                )
            if len(content) == 1:
                return content[0]
            content_parts: List[Dict[str, Union[str, Dict[str, str]]]] = [
                {"type": "text", "text": content[0]}
            ]
            for item in content[1:]:
                if isinstance(item, Image):
                    data_url = f"data:{item.image_format};base64,{item.image_str}"
                    content_parts.append({
                        "type": "image_url",
                        "image_url": {"url": data_url}
                    })
                else:
                    raise ValueError(
                        f"Invalid item type in content list: {type(item)}. "
                        + "Expected Image objects after the first string."
                    )
            return content_parts
        else:
            raise TypeError(
                f"Invalid content type: {type(content)}. "
                + "Content must be a string or a list starting with a string "
                + "followed by optional Image objects."
            )

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the `response_format` argument to pass to the client.

        Parameters
        ----------
        output_type : Optional[Any]
            The desired output type provided by the user.

        Returns
        -------
        dict
            The `response_format` dict to pass to the client.

        """
        if output_type is None:
            return {}

        # JSON schema types
        elif is_pydantic_model(output_type):
            schema = output_type.model_json_schema()
            return self.format_json_schema_type(schema, output_type.__name__)
        elif is_dataclass(output_type):
            schema = TypeAdapter(output_type).json_schema()
            return self.format_json_schema_type(schema, output_type.__name__)
        elif is_typed_dict(output_type):
            schema = TypeAdapter(output_type).json_schema()
            return self.format_json_schema_type(schema, output_type.__name__)
        elif is_genson_schema_builder(output_type):
            schema = json.loads(output_type.to_json())
            return self.format_json_schema_type(schema)
        elif isinstance(output_type, JsonSchema):
            return self.format_json_schema_type(json.loads(output_type.schema))

        # Json mode
        elif is_native_dict(output_type):
            return {"type": "json_object"}

        # Unsupported types
        elif isinstance(output_type, Regex):
            raise TypeError(
                "Regex-based structured outputs are not available with "
                "Mistral."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not available with Mistral."
            )
        else:
            type_name = getattr(output_type, "__name__", str(output_type))
            raise TypeError(
                f"The type {type_name} is not available with Mistral."
            )

    def format_json_schema_type(
        self, schema: dict, schema_name: str = "default"
    ) -> dict:
        """Create the `response_format` argument to pass to the client from a
        JSON schema dictionary.

        Parameters
        ----------
        schema : dict
            The JSON schema to format.
        schema_name : str
            The name of the schema.

        Returns
        -------
        dict
            The value of the `response_format` argument to pass to the client.

        """
        schema = set_additional_properties_false_json_schema(schema)

        return {
            "type": "json_schema",
            "json_schema": {
                "schema": schema,
                "name": schema_name.lower(),
                "strict": True
            }
        }

`format_chat_model_input(model_input)`

Format a Chat input into a list of messages.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat`	The Chat object containing a list of message dictionaries.	required

Returns:

Type	Description
`list`	A list of Mistral message objects.

Source code in outlines/models/mistral.py

@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> list:
    """Format a Chat input into a list of messages.

    Parameters
    ----------
    model_input : Chat
        The Chat object containing a list of message dictionaries.

    Returns
    -------
    list
        A list of Mistral message objects.

    """
    from mistralai import UserMessage, AssistantMessage, SystemMessage

    messages = []

    for message in model_input.messages:
        role = message["role"]
        content = message["content"]
        if role == "user":
            messages.append(
                UserMessage(content=self._create_message_content(content))
            )
        elif role == "assistant":
            messages.append(AssistantMessage(content=content))
        elif role == "system":
            messages.append(SystemMessage(content=content))
        else:
            raise ValueError(f"Unsupported role: {role}")

    return messages

`format_input(model_input)`

Generate the messages argument to pass to the client.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`list`	The `messages` argument to pass to the client.

Source code in outlines/models/mistral.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the `messages` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    list
        The `messages` argument to pass to the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "Mistral. The only available types are `str`, `list` and `Chat`."
    )

`format_json_schema_type(schema, schema_name='default')`

Create the response_format argument to pass to the client from a JSON schema dictionary.

Parameters:

Name	Type	Description	Default
`schema`	`dict`	The JSON schema to format.	required
`schema_name`	`str`	The name of the schema.	`'default'`

Returns:

Type	Description
`dict`	The value of the `response_format` argument to pass to the client.

Source code in outlines/models/mistral.py

def format_json_schema_type(
    self, schema: dict, schema_name: str = "default"
) -> dict:
    """Create the `response_format` argument to pass to the client from a
    JSON schema dictionary.

    Parameters
    ----------
    schema : dict
        The JSON schema to format.
    schema_name : str
        The name of the schema.

    Returns
    -------
    dict
        The value of the `response_format` argument to pass to the client.

    """
    schema = set_additional_properties_false_json_schema(schema)

    return {
        "type": "json_schema",
        "json_schema": {
            "schema": schema,
            "name": schema_name.lower(),
            "strict": True
        }
    }

`format_list_model_input(model_input)`

Format a list input into a list of messages.

Parameters:

Name	Type	Description	Default
`model_input`	`list`	The input list, containing a string prompt and optionally Image objects (vision models only).	required

Returns:

Type	Description
`list`	A list of Mistral message objects.

Source code in outlines/models/mistral.py

@format_input.register(list)
def format_list_model_input(self, model_input: list) -> list:
    """Format a list input into a list of messages.

    Parameters
    ----------
    model_input : list
        The input list, containing a string prompt and optionally Image
        objects (vision models only).

    Returns
    -------
    list
        A list of Mistral message objects.

    """
    from mistralai import UserMessage

    return [UserMessage(content=self._create_message_content(model_input))]

`format_output_type(output_type=None)`

Generate the response_format argument to pass to the client.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The desired output type provided by the user.	`None`

Returns:

Type	Description
`dict`	The `response_format` dict to pass to the client.

Source code in outlines/models/mistral.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the `response_format` argument to pass to the client.

    Parameters
    ----------
    output_type : Optional[Any]
        The desired output type provided by the user.

    Returns
    -------
    dict
        The `response_format` dict to pass to the client.

    """
    if output_type is None:
        return {}

    # JSON schema types
    elif is_pydantic_model(output_type):
        schema = output_type.model_json_schema()
        return self.format_json_schema_type(schema, output_type.__name__)
    elif is_dataclass(output_type):
        schema = TypeAdapter(output_type).json_schema()
        return self.format_json_schema_type(schema, output_type.__name__)
    elif is_typed_dict(output_type):
        schema = TypeAdapter(output_type).json_schema()
        return self.format_json_schema_type(schema, output_type.__name__)
    elif is_genson_schema_builder(output_type):
        schema = json.loads(output_type.to_json())
        return self.format_json_schema_type(schema)
    elif isinstance(output_type, JsonSchema):
        return self.format_json_schema_type(json.loads(output_type.schema))

    # Json mode
    elif is_native_dict(output_type):
        return {"type": "json_object"}

    # Unsupported types
    elif isinstance(output_type, Regex):
        raise TypeError(
            "Regex-based structured outputs are not available with "
            "Mistral."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not available with Mistral."
        )
    else:
        type_name = getattr(output_type, "__name__", str(output_type))
        raise TypeError(
            f"The type {type_name} is not available with Mistral."
        )

`format_str_model_input(model_input)`

Format a string input into a list of messages.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The input string prompt.	required

Returns:

Type	Description
`list`	A list of Mistral message objects.

Source code in outlines/models/mistral.py

@format_input.register(str)
def format_str_model_input(self, model_input: str) -> list:
    """Format a string input into a list of messages.

    Parameters
    ----------
    model_input : str
        The input string prompt.

    Returns
    -------
    list
        A list of Mistral message objects.

    """
    from mistralai import UserMessage

    return [UserMessage(content=model_input)]

`from_mistral(client, model_name=None, async_client=False)`

Create an Outlines Mistral model instance from a mistralai.Mistral client.

Parameters:

Name	Type	Description	Default
`client`	`Mistral`	A mistralai.Mistral client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`
`async_client`	`bool`	If True, return an AsyncMistral instance; otherwise, return a Mistral instance.	`False`

Returns:

Type	Description
`Union[Mistral, AsyncMistral]`	An Outlines Mistral or AsyncMistral model instance.

Source code in outlines/models/mistral.py

def from_mistral(
    client: "MistralClient",
    model_name: Optional[str] = None,
    async_client: bool = False,
) -> Union[Mistral, AsyncMistral]:
    """Create an Outlines Mistral model instance from a mistralai.Mistral
    client.

    Parameters
    ----------
    client : MistralClient
        A mistralai.Mistral client instance.
    model_name : Optional[str]
        The name of the model to use.
    async_client : bool
        If True, return an AsyncMistral instance;
        otherwise, return a Mistral instance.

    Returns
    -------
    Union[Mistral, AsyncMistral]
        An Outlines Mistral or AsyncMistral model instance.

    """
    from mistralai import Mistral as MistralClient

    if not isinstance(client, MistralClient):
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "`mistralai.Mistral`."
        )

    if async_client:
        return AsyncMistral(client, model_name)
    else:
        return Mistral(client, model_name)

`mlxlm`

Integration with the mlx_lm library.

Local runtime calls intentionally bypass outlines.exceptions.normalize_provider_errors().

`MLXLM`

Bases: Model

Thin wrapper around an mlx_lm model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the mlx_lm library.

Source code in outlines/models/mlxlm.py

class MLXLM(Model):
    """Thin wrapper around an `mlx_lm` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `mlx_lm` library.

    """

    tensor_library_name = "mlx"

    def __init__(
        self,
        model: "nn.Module",
        tokenizer: "MLXTokenizer",
    ):
        """
        Parameters
        ----------
        model
            An instance of an `mlx_lm` model.
        tokenizer
            An instance of an `mlx_lm` tokenizer or of a compatible
            `transformers` tokenizer.

        """
        self.model = model
        # self.mlx_tokenizer is used by the mlx-lm in its generate function
        self.mlx_tokenizer = tokenizer
        # self.tokenizer is used by the logits processor
        # tokenizer may be a mlx_lm.TokenizerWrapper (whose ._tokenizer is a
        # PreTrainedTokenizerFast) or a PreTrainedTokenizerFast passed directly
        inner = getattr(tokenizer, "_tokenizer", tokenizer)
        hf_tokenizer = inner if isinstance(inner, PreTrainedTokenizerBase) else tokenizer
        self.tokenizer = TransformerTokenizer(hf_tokenizer)
        self.type_adapter = MLXLMTypeAdapter(
            tokenizer=tokenizer,
            has_chat_template=_check_hf_chat_template(tokenizer)
        )

    def generate(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> str:
        """Generate text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        str
            The text generated by the model.

        """
        from mlx_lm import generate

        return generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )

    def generate_batch(
        self,
        model_input: list[str],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> list[str]:
        """Generate a batch of text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The list of prompts based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        list[str]
            The list of text generated by the model.

        """
        from mlx_lm import batch_generate

        if output_type:
            raise NotImplementedError(
                "mlx-lm does not support constrained generation with batching."
                + "You cannot provide an `output_type` with this method."
            )

        model_input = [self.type_adapter.format_input(item) for item in model_input]

        # Contrarily to the other generate methods, batch_generate requires
        # tokenized prompts
        add_special_tokens = [
            (
                self.mlx_tokenizer.bos_token is None
                or not prompt.startswith(self.mlx_tokenizer.bos_token)
            )
            for prompt in model_input
        ]
        tokenized_model_input = [
            self.mlx_tokenizer.encode(
                model_input[i], add_special_tokens=add_special_tokens[i]
            )
            for i in range(len(model_input))
        ]

        response = batch_generate(
            self.model,
            self.mlx_tokenizer,
            tokenized_model_input,
            **kwargs,
        )

        return response.texts

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **kwargs,
    ) -> Iterator[str]:
        """Stream text using `mlx-lm`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        kwargs
            Additional keyword arguments to pass to the `mlx-lm` library.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        from mlx_lm import stream_generate

        for gen_response in stream_generate(
            self.model,
            self.mlx_tokenizer,
            self.type_adapter.format_input(model_input),
            logits_processors=self.type_adapter.format_output_type(output_type),
            **kwargs,
        ):
            yield gen_response.text

`init(model, tokenizer)`

Parameters:

Name	Type	Description	Default
`model`	`Module`	An instance of an `mlx_lm` model.	required
`tokenizer`	`MLXTokenizer`	An instance of an `mlx_lm` tokenizer or of a compatible `transformers` tokenizer.	required

Source code in outlines/models/mlxlm.py

def __init__(
    self,
    model: "nn.Module",
    tokenizer: "MLXTokenizer",
):
    """
    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        `transformers` tokenizer.

    """
    self.model = model
    # self.mlx_tokenizer is used by the mlx-lm in its generate function
    self.mlx_tokenizer = tokenizer
    # self.tokenizer is used by the logits processor
    # tokenizer may be a mlx_lm.TokenizerWrapper (whose ._tokenizer is a
    # PreTrainedTokenizerFast) or a PreTrainedTokenizerFast passed directly
    inner = getattr(tokenizer, "_tokenizer", tokenizer)
    hf_tokenizer = inner if isinstance(inner, PreTrainedTokenizerBase) else tokenizer
    self.tokenizer = TransformerTokenizer(hf_tokenizer)
    self.type_adapter = MLXLMTypeAdapter(
        tokenizer=tokenizer,
        has_chat_template=_check_hf_chat_template(tokenizer)
    )

`generate(model_input, output_type=None, **kwargs)`

Generate text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/mlxlm.py

def generate(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> str:
    """Generate text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    str
        The text generated by the model.

    """
    from mlx_lm import generate

    return generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    )

`generate_batch(model_input, output_type=None, **kwargs)`

Generate a batch of text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`list[str]`	The list of prompts based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`list[str]`	The list of text generated by the model.

Source code in outlines/models/mlxlm.py

def generate_batch(
    self,
    model_input: list[str],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> list[str]:
    """Generate a batch of text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The list of prompts based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    list[str]
        The list of text generated by the model.

    """
    from mlx_lm import batch_generate

    if output_type:
        raise NotImplementedError(
            "mlx-lm does not support constrained generation with batching."
            + "You cannot provide an `output_type` with this method."
        )

    model_input = [self.type_adapter.format_input(item) for item in model_input]

    # Contrarily to the other generate methods, batch_generate requires
    # tokenized prompts
    add_special_tokens = [
        (
            self.mlx_tokenizer.bos_token is None
            or not prompt.startswith(self.mlx_tokenizer.bos_token)
        )
        for prompt in model_input
    ]
    tokenized_model_input = [
        self.mlx_tokenizer.encode(
            model_input[i], add_special_tokens=add_special_tokens[i]
        )
        for i in range(len(model_input))
    ]

    response = batch_generate(
        self.model,
        self.mlx_tokenizer,
        tokenized_model_input,
        **kwargs,
    )

    return response.texts

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using mlx-lm.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`kwargs`		Additional keyword arguments to pass to the `mlx-lm` library.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/mlxlm.py

def generate_stream(
    self,
    model_input: str,
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **kwargs,
) -> Iterator[str]:
    """Stream text using `mlx-lm`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    kwargs
        Additional keyword arguments to pass to the `mlx-lm` library.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    from mlx_lm import stream_generate

    for gen_response in stream_generate(
        self.model,
        self.mlx_tokenizer,
        self.type_adapter.format_input(model_input),
        logits_processors=self.type_adapter.format_output_type(output_type),
        **kwargs,
    ):
        yield gen_response.text

`MLXLMTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the MLXLM model.

Source code in outlines/models/mlxlm.py

class MLXLMTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `MLXLM` model."""

    def __init__(self, tokenizer: "PreTrainedTokenizer", has_chat_template: bool = False):
        self.tokenizer = tokenizer
        self.has_chat_template = has_chat_template

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {type(model_input)} is not available with "
            "mlx-lm. The available types are `str` and `Chat`."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str:
        if self.has_chat_template:
            return self.format_chat_input(Chat([{"role": "user", "content": model_input}]))
        return model_input

    @format_input.register(Chat)
    def format_chat_input(self, model_input: Chat) -> str:
        if not all(
            isinstance(message["content"], str)
            for message in model_input.messages
        ):
            raise ValueError(
                "mlx-lm does not support multi-modal messages."
                + "The content of each message must be a string."
            )

        return self.tokenizer.apply_chat_template(
            model_input.messages,
            tokenize=False,
            add_generation_prompt=True,
        )

    def format_output_type(
        self, output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional[List[OutlinesLogitsProcessor]]:
        """Generate the logits processor argument to pass to the model.

        Parameters
        ----------
        output_type
            The logits processor provided.

        Returns
        -------
        Optional[list[OutlinesLogitsProcessor]]
            The logits processor argument to be passed to the model.

        """
        if not output_type:
            return None
        return [output_type]

`format_input(model_input)`

Generate the prompt argument to pass to the model.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`str`	The formatted input to be passed to the model.

Source code in outlines/models/mlxlm.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {type(model_input)} is not available with "
        "mlx-lm. The available types are `str` and `Chat`."
    )

`format_output_type(output_type=None)`

Generate the logits processor argument to pass to the model.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor provided.	`None`

Returns:

Type	Description
`Optional[list[OutlinesLogitsProcessor]]`	The logits processor argument to be passed to the model.

Source code in outlines/models/mlxlm.py

def format_output_type(
    self, output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional[List[OutlinesLogitsProcessor]]:
    """Generate the logits processor argument to pass to the model.

    Parameters
    ----------
    output_type
        The logits processor provided.

    Returns
    -------
    Optional[list[OutlinesLogitsProcessor]]
        The logits processor argument to be passed to the model.

    """
    if not output_type:
        return None
    return [output_type]

`from_mlxlm(model, tokenizer)`

Create an Outlines MLXLM model instance from an mlx_lm model and a tokenizer.

Parameters:

Name	Type	Description	Default
`model`	`Module`	An instance of an `mlx_lm` model.	required
`tokenizer`	`MLXTokenizer`	An instance of an `mlx_lm` tokenizer or of a compatible transformers tokenizer.	required

Returns:

Type	Description
`MLXLM`	An Outlines `MLXLM` model instance.

Source code in outlines/models/mlxlm.py

def from_mlxlm(model: "nn.Module", tokenizer: "MLXTokenizer") -> MLXLM:
    """Create an Outlines `MLXLM` model instance from an `mlx_lm` model and a
    tokenizer.

    Parameters
    ----------
    model
        An instance of an `mlx_lm` model.
    tokenizer
        An instance of an `mlx_lm` tokenizer or of a compatible
        transformers tokenizer.

    Returns
    -------
    MLXLM
        An Outlines `MLXLM` model instance.

    """
    return MLXLM(model, tokenizer)

`ollama`

Integration with the ollama library.

`AsyncOllama`

Bases: AsyncModel

Thin wrapper around the ollama.AsyncClient client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the ollama.AsyncClient client.

Source code in outlines/models/ollama.py

class AsyncOllama(AsyncModel):
    """Thin wrapper around the `ollama.AsyncClient` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `ollama.AsyncClient` client.

    """

    def __init__(
        self,client: "AsyncClient", model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `ollama.Client` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OllamaTypeAdapter()

    async def generate(self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                **kwargs,
            )
        return response.message.content

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `ollama` library does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                stream=True,
                **kwargs,
            )
            async for chunk in stream:
                yield chunk.message.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncClient`	The `ollama.Client` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/ollama.py

def __init__(
    self,client: "AsyncClient", model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `ollama.Client` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OllamaTypeAdapter()

`generate(model_input, output_type=None, **kwargs)` `async`

Generate text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/ollama.py

async def generate(self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )
    return response.message.content

`generate_stream(model_input, output_type=None, **kwargs)` `async`

Stream text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/ollama.py

async def generate_stream( # type: ignore
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            stream=True,
            **kwargs,
        )
        async for chunk in stream:
            yield chunk.message.content

`Ollama`

Bases: Model

Thin wrapper around the ollama.Client client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the ollama.Client client.

Source code in outlines/models/ollama.py

class Ollama(Model):
    """Thin wrapper around the `ollama.Client` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `ollama.Client` client.

    """

    def __init__(self, client: "Client", model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            The `ollama.Client` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OllamaTypeAdapter()

    def generate(self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """Generate text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        print(self.type_adapter.format_input(model_input))

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                **kwargs,
            )

        return response.message.content

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **kwargs,
    ):
        raise NotImplementedError(
            "The `ollama` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Chat | str | list,
        output_type: Optional[Any] = None,
        **kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using Ollama.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema.
        **kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        if "model" not in kwargs and self.model_name is not None:
            kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat(
                messages=self.type_adapter.format_input(model_input),
                format=self.type_adapter.format_output_type(output_type),
                stream=True,
                **kwargs,
            )
            for chunk in response:
                yield chunk.message.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Client`	The `ollama.Client` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/ollama.py

def __init__(self, client: "Client", model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        The `ollama.Client` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OllamaTypeAdapter()

`generate(model_input, output_type=None, **kwargs)`

Generate text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/ollama.py

def generate(self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> str:
    """Generate text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    print(self.type_adapter.format_input(model_input))

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            **kwargs,
        )

    return response.message.content

`generate_stream(model_input, output_type=None, **kwargs)`

Stream text using Ollama.

Parameters:

Name	Type	Description	Default
`model_input`	`Chat \| str \| list`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema.	`None`
`**kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/ollama.py

def generate_stream(
    self,
    model_input: Chat | str | list,
    output_type: Optional[Any] = None,
    **kwargs: Any,
) -> Iterator[str]:
    """Stream text using Ollama.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema.
    **kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    if "model" not in kwargs and self.model_name is not None:
        kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat(
            messages=self.type_adapter.format_input(model_input),
            format=self.type_adapter.format_output_type(output_type),
            stream=True,
            **kwargs,
        )
        for chunk in response:
            yield chunk.message.content

`OllamaTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the Ollama model.

Source code in outlines/models/ollama.py

class OllamaTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Ollama` model."""

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the value of the `messages` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        list
            The formatted value of the `messages` argument to be passed to
            the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "Ollama. The only available types are `str`, `list` and `Chat`."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user only passes a prompt.

        """
        return [
            self._create_message("user", model_input)
        ]

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user passes a prompt and images.

        """
        return [
            self._create_message("user", model_input)
        ]

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user passes a Chat instance.

        """
        return [
            self._create_message(message["role"], message["content"])
            for message in model_input.messages
        ]

    def _create_message(self, role: str, content: str | list) -> dict:
        """Create a message."""

        if isinstance(content, str):
            return {
                "role": role,
                "content": content,
            }

        elif isinstance(content, list):
            prompt = content[0]
            images = content[1:]

            if not all(isinstance(image, Image) for image in images):
                raise ValueError("All assets provided must be of type Image")

            return {
                "role": role,
                "content": prompt,
                "images": [image.image_str for image in images],
            }

        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                "The content must be a string or a list containing a string "
                "and a list of images."
            )

    def format_output_type(
        self, output_type: Optional[Any] = None
    ) -> Optional[dict]:
        """Format the output type to pass to the client.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        Optional[str]
            The formatted output type to be passed to the model.

        """
        if output_type is None:
            return None
        elif isinstance(output_type, Regex):
            raise TypeError(
                "Regex-based structured outputs are not supported by Ollama. "
                "Use an open source model in the meantime."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not supported by Ollama. "
                "Use an open source model in the meantime."
            )
        elif JsonSchema.is_json_schema(output_type):
            return cast(dict, JsonSchema.convert_to(output_type, ["dict"]))
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not supported by Ollama. "
                "Consider using a local model instead."
            )

`format_chat_model_input(model_input)`

Generate the value of the messages argument to pass to the client when the user passes a Chat instance.

Source code in outlines/models/ollama.py

@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user passes a Chat instance.

    """
    return [
        self._create_message(message["role"], message["content"])
        for message in model_input.messages
    ]

`format_input(model_input)`

Generate the value of the messages argument to pass to the client.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`list`	The formatted value of the `messages` argument to be passed to the client.

Source code in outlines/models/ollama.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the value of the `messages` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    list
        The formatted value of the `messages` argument to be passed to
        the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "Ollama. The only available types are `str`, `list` and `Chat`."
    )

`format_list_model_input(model_input)`

Generate the value of the messages argument to pass to the client when the user passes a prompt and images.

Source code in outlines/models/ollama.py

@format_input.register(list)
def format_list_model_input(self, model_input: list) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user passes a prompt and images.

    """
    return [
        self._create_message("user", model_input)
    ]

`format_output_type(output_type=None)`

Format the output type to pass to the client.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`Optional[str]`	The formatted output type to be passed to the model.

Source code in outlines/models/ollama.py

def format_output_type(
    self, output_type: Optional[Any] = None
) -> Optional[dict]:
    """Format the output type to pass to the client.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    Optional[str]
        The formatted output type to be passed to the model.

    """
    if output_type is None:
        return None
    elif isinstance(output_type, Regex):
        raise TypeError(
            "Regex-based structured outputs are not supported by Ollama. "
            "Use an open source model in the meantime."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not supported by Ollama. "
            "Use an open source model in the meantime."
        )
    elif JsonSchema.is_json_schema(output_type):
        return cast(dict, JsonSchema.convert_to(output_type, ["dict"]))
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not supported by Ollama. "
            "Consider using a local model instead."
        )

`format_str_model_input(model_input)`

Generate the value of the messages argument to pass to the client when the user only passes a prompt.

Source code in outlines/models/ollama.py

@format_input.register(str)
def format_str_model_input(self, model_input: str) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user only passes a prompt.

    """
    return [
        self._create_message("user", model_input)
    ]

`from_ollama(client, model_name=None)`

Create an Outlines Ollama model instance from an ollama.Client or ollama.AsyncClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[Client, AsyncClient]`	A `ollama.Client` or `ollama.AsyncClient` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[Ollama, AsyncOllama]`	An Outlines `Ollama` or `AsyncOllama` model instance.

Source code in outlines/models/ollama.py

def from_ollama(
    client: Union["Client", "AsyncClient"], model_name: Optional[str] = None
) -> Union[Ollama, AsyncOllama]:
    """Create an Outlines `Ollama` model instance from an `ollama.Client`
    or `ollama.AsyncClient` instance.

    Parameters
    ----------
    client
        A `ollama.Client` or `ollama.AsyncClient` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[Ollama, AsyncOllama]
        An Outlines `Ollama` or `AsyncOllama` model instance.

    """
    from ollama import AsyncClient, Client

    if isinstance(client, Client):
        return Ollama(client, model_name)
    elif isinstance(client, AsyncClient):
        return AsyncOllama(client, model_name)
    else:
        raise ValueError(
            "Invalid client type, the client must be an instance of "
            "`ollama.Client` or `ollama.AsyncClient`."
        )

`openai`

Integration with OpenAI's API.

`AsyncOpenAI`

Bases: AsyncModel

Thin wrapper around the openai.AsyncOpenAI client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.AsyncOpenAI client.

Source code in outlines/models/openai.py

class AsyncOpenAI(AsyncModel):
    """Thin wrapper around the `openai.AsyncOpenAI` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.AsyncOpenAI` client.

    """

    def __init__(
        self,
        client: Union["AsyncOpenAIClient", "AsyncAzureOpenAIClient"],
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OpenAITypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = await self.client.chat.completions.create(
                messages=messages,
                **response_format,
                **inference_kwargs,
            )

        messages = [choice.message for choice in result.choices]
        for message in messages:
            if message.refusal is not None:
                raise GenerationError(
                    f"OpenAI refused to answer the request: {message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `openai` library does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs,
    ) -> AsyncIterator[str]:
        """Stream text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                stream=True,
                messages=messages,
                **response_format,
                **inference_kwargs
            )
            async for chunk in stream:
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Union[AsyncOpenAI, AsyncAzureOpenAI]`	The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/openai.py

def __init__(
    self,
    client: Union["AsyncOpenAIClient", "AsyncAzureOpenAIClient"],
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OpenAITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/openai.py

async def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = await self.client.chat.completions.create(
            messages=messages,
            **response_format,
            **inference_kwargs,
        )

    messages = [choice.message for choice in result.choices]
    for message in messages:
        if message.refusal is not None:
            raise GenerationError(
                f"OpenAI refused to answer the request: {message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/openai.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs,
) -> AsyncIterator[str]:
    """Stream text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            stream=True,
            messages=messages,
            **response_format,
            **inference_kwargs
        )
        async for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`OpenAI`

Bases: Model

Thin wrapper around the openai.OpenAI client.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client.

Source code in outlines/models/openai.py

class OpenAI(Model):
    """Thin wrapper around the `openai.OpenAI` client.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client.

    """

    def __init__(
        self,
        client: Union["OpenAIClient", "AzureOpenAIClient"],
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            The `openai.OpenAI` client.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = OpenAITypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            result = self.client.chat.completions.create(
                messages=messages,
                **response_format,
                **inference_kwargs,
            )

        messages = [choice.message for choice in result.choices]
        for message in messages:
            if message.refusal is not None:
                raise GenerationError(
                    f"OpenAI refused to answer the request: {message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "The `openai` library does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Union[type[BaseModel], str]] = None,
        **inference_kwargs,
    ) -> Iterator[str]:
        """Stream text using OpenAI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. The
            output type must be of a type that can be converted to a JSON
            schema or an empty dictionary.
        **inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        messages = self.type_adapter.format_input(model_input)
        response_format = self.type_adapter.format_output_type(output_type)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                stream=True,
                messages=messages,
                **response_format,
                **inference_kwargs
            )
            for chunk in stream:
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AzureOpenAI]`	The `openai.OpenAI` client.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/openai.py

def __init__(
    self,
    client: Union["OpenAIClient", "AzureOpenAIClient"],
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        The `openai.OpenAI` client.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = OpenAITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/openai.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        result = self.client.chat.completions.create(
            messages=messages,
            **response_format,
            **inference_kwargs,
        )

    messages = [choice.message for choice in result.choices]
    for message in messages:
        if message.refusal is not None:
            raise GenerationError(
                f"OpenAI refused to answer the request: {message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Union[type[BaseModel], str]]`	The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary.	`None`
`**inference_kwargs`		Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/openai.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Union[type[BaseModel], str]] = None,
    **inference_kwargs,
) -> Iterator[str]:
    """Stream text using OpenAI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. The
        output type must be of a type that can be converted to a JSON
        schema or an empty dictionary.
    **inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    messages = self.type_adapter.format_input(model_input)
    response_format = self.type_adapter.format_output_type(output_type)

    if "model" not in inference_kwargs and self.model_name is not None:
        inference_kwargs["model"] = self.model_name

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            stream=True,
            messages=messages,
            **response_format,
            **inference_kwargs
        )
        for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`OpenAITypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the OpenAI model.

OpenAITypeAdapter is responsible for preparing the arguments to OpenAI's completions.create methods: the input (prompt and possibly image), as well as the output type (only JSON).

Source code in outlines/models/openai.py

class OpenAITypeAdapter(ModelTypeAdapter):
    """Type adapter for the `OpenAI` model.

    `OpenAITypeAdapter` is responsible for preparing the arguments to OpenAI's
    `completions.create` methods: the input (prompt and possibly image), as
    well as the output type (only JSON).

    """

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the `messages` argument to pass to the client.

        Parameters
        ----------
        model_input
            The input provided by the user.

        Returns
        -------
        dict
            The formatted input to be passed to the client.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "OpenAI. The only available types are `str`, `list` and `Chat`."
        )

    @format_input.register(str)
    def format_str_model_input(self, model_input: str) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user only passes a prompt.

        """
        return [
            self._create_message("user", model_input)
        ]

    @format_input.register(list)
    def format_list_model_input(self, model_input: list) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user passes a prompt and images.

        """
        return [
            self._create_message("user", model_input)
        ]

    @format_input.register(Chat)
    def format_chat_model_input(self, model_input: Chat) -> list:
        """Generate the value of the `messages` argument to pass to the
        client when the user passes a Chat instance.

        """
        return [
            self._create_message(message["role"], message["content"])
            for message in model_input.messages
        ]

    def _create_message(self, role: str, content: str | list) -> dict:
        """Create a message."""

        if isinstance(content, str):
            return {
                "role": role,
                "content": content,
            }

        elif isinstance(content, list):
            prompt = content[0]
            images = content[1:]

            if not all(isinstance(image, Image) for image in images):
                raise ValueError("All assets provided must be of type Image")

            image_parts = [
                self._create_img_content(image)
                for image in images
            ]

            return {
                "role": role,
                "content": [
                    {"type": "text", "text": prompt},
                    *image_parts,
                ],
            }

        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                "The content must be a string or a list containing a string "
                "and a list of images."
            )

    def _create_img_content(self, image: Image) -> dict:
        """Create the content for an image input."""
        return {
            "type": "image_url",
            "image_url": {
                "url": f"data:{image.image_format};base64,{image.image_str}"  # noqa: E702
            },
        }

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the `response_format` argument to the client based on the
        output type specified by the user.

        Parameters
        ----------
        output_type
            The output type provided by the user.

        Returns
        -------
        dict
            The formatted output type to be passed to the client.

        """
        # Unsupported languages
        if isinstance(output_type, Regex):
            raise TypeError(
                "Neither regex-based structured outputs nor the `pattern` keyword "
                "in Json Schema are available with OpenAI. Use an open source "
                "model or dottxt instead."
            )
        elif isinstance(output_type, CFG):
            raise TypeError(
                "CFG-based structured outputs are not available with OpenAI. "
                "Use an open source model or dottxt instead."
            )

        if output_type is None:
            return {}
        elif is_native_dict(output_type):
            return self.format_json_mode_type()
        elif JsonSchema.is_json_schema(output_type):
            return self.format_json_output_type(
                cast(dict, JsonSchema.convert_to(output_type, ["dict"]))
            )
        else:
            type_name = getattr(output_type, "__name__", output_type)
            raise TypeError(
                f"The type `{type_name}` is not available with OpenAI. "
                "Use an open source model or dottxt instead."
            )

    def format_json_output_type(self, schema: dict) -> dict:
        """Generate the `response_format` argument to the client when the user
        specified a `Json` output type.

        """
        # OpenAI requires `additionalProperties` to be set to False
        schema = set_additional_properties_false_json_schema(schema)

        return {
            "response_format": {
                "type": "json_schema",
                "json_schema": {
                    "name": "default",
                    "strict": True,
                    "schema": schema,
                },
            }
        }

    def format_json_mode_type(self) -> dict:
        """Generate the `response_format` argument to the client when the user
        specified the output type should be a JSON but without specifying the
        schema (also called "JSON mode").

        """
        return {"response_format": {"type": "json_object"}}

`format_chat_model_input(model_input)`

Generate the value of the messages argument to pass to the client when the user passes a Chat instance.

Source code in outlines/models/openai.py

@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user passes a Chat instance.

    """
    return [
        self._create_message(message["role"], message["content"])
        for message in model_input.messages
    ]

`format_input(model_input)`

Generate the messages argument to pass to the client.

Parameters:

Name	Type	Description	Default
`model_input`		The input provided by the user.	required

Returns:

Type	Description
`dict`	The formatted input to be passed to the client.

Source code in outlines/models/openai.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the `messages` argument to pass to the client.

    Parameters
    ----------
    model_input
        The input provided by the user.

    Returns
    -------
    dict
        The formatted input to be passed to the client.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "OpenAI. The only available types are `str`, `list` and `Chat`."
    )

`format_json_mode_type()`

Generate the response_format argument to the client when the user specified the output type should be a JSON but without specifying the schema (also called "JSON mode").

Source code in outlines/models/openai.py

def format_json_mode_type(self) -> dict:
    """Generate the `response_format` argument to the client when the user
    specified the output type should be a JSON but without specifying the
    schema (also called "JSON mode").

    """
    return {"response_format": {"type": "json_object"}}

`format_json_output_type(schema)`

Generate the response_format argument to the client when the user specified a Json output type.

Source code in outlines/models/openai.py

def format_json_output_type(self, schema: dict) -> dict:
    """Generate the `response_format` argument to the client when the user
    specified a `Json` output type.

    """
    # OpenAI requires `additionalProperties` to be set to False
    schema = set_additional_properties_false_json_schema(schema)

    return {
        "response_format": {
            "type": "json_schema",
            "json_schema": {
                "name": "default",
                "strict": True,
                "schema": schema,
            },
        }
    }

`format_list_model_input(model_input)`

Generate the value of the messages argument to pass to the client when the user passes a prompt and images.

Source code in outlines/models/openai.py

@format_input.register(list)
def format_list_model_input(self, model_input: list) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user passes a prompt and images.

    """
    return [
        self._create_message("user", model_input)
    ]

`format_output_type(output_type=None)`

Generate the response_format argument to the client based on the output type specified by the user.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The output type provided by the user.	`None`

Returns:

Type	Description
`dict`	The formatted output type to be passed to the client.

Source code in outlines/models/openai.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the `response_format` argument to the client based on the
    output type specified by the user.

    Parameters
    ----------
    output_type
        The output type provided by the user.

    Returns
    -------
    dict
        The formatted output type to be passed to the client.

    """
    # Unsupported languages
    if isinstance(output_type, Regex):
        raise TypeError(
            "Neither regex-based structured outputs nor the `pattern` keyword "
            "in Json Schema are available with OpenAI. Use an open source "
            "model or dottxt instead."
        )
    elif isinstance(output_type, CFG):
        raise TypeError(
            "CFG-based structured outputs are not available with OpenAI. "
            "Use an open source model or dottxt instead."
        )

    if output_type is None:
        return {}
    elif is_native_dict(output_type):
        return self.format_json_mode_type()
    elif JsonSchema.is_json_schema(output_type):
        return self.format_json_output_type(
            cast(dict, JsonSchema.convert_to(output_type, ["dict"]))
        )
    else:
        type_name = getattr(output_type, "__name__", output_type)
        raise TypeError(
            f"The type `{type_name}` is not available with OpenAI. "
            "Use an open source model or dottxt instead."
        )

`format_str_model_input(model_input)`

Generate the value of the messages argument to pass to the client when the user only passes a prompt.

Source code in outlines/models/openai.py

@format_input.register(str)
def format_str_model_input(self, model_input: str) -> list:
    """Generate the value of the `messages` argument to pass to the
    client when the user only passes a prompt.

    """
    return [
        self._create_message("user", model_input)
    ]

`from_openai(client, model_name=None)`

Create an Outlines OpenAI or AsyncOpenAI model instance from an openai.OpenAI or openai.AsyncOpenAI client.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]`	An `openai.OpenAI`, `openai.AsyncOpenAI`, `openai.AzureOpenAI` or `openai.AsyncAzureOpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`OpenAI`	An Outlines `OpenAI` or `AsyncOpenAI` model instance.

Source code in outlines/models/openai.py

def from_openai(
    client: Union[
        "OpenAIClient",
        "AsyncOpenAIClient",
        "AzureOpenAIClient",
        "AsyncAzureOpenAIClient",
    ],
    model_name: Optional[str] = None,
) -> Union[OpenAI, AsyncOpenAI]:
    """Create an Outlines `OpenAI` or `AsyncOpenAI` model instance from an
    `openai.OpenAI` or `openai.AsyncOpenAI` client.

    Parameters
    ----------
    client
        An `openai.OpenAI`, `openai.AsyncOpenAI`, `openai.AzureOpenAI` or
        `openai.AsyncAzureOpenAI` client instance.
    model_name
        The name of the model to use.

    Returns
    -------
    OpenAI
        An Outlines `OpenAI` or `AsyncOpenAI` model instance.

    """
    import openai

    if isinstance(client, openai.OpenAI):
        return OpenAI(client, model_name)
    elif isinstance(client, openai.AsyncOpenAI):
        return AsyncOpenAI(client, model_name)
    else:
        raise ValueError(
            "Invalid client type. The client must be an instance of "
            "+ `openai.OpenAI` or `openai.AsyncOpenAI`."
        )

`sglang`

Integration with an SGLang server.

`AsyncSGLang`

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py

class AsyncSGLang(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.
        model_name
            The name of the model to use.

        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using `sglang`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The SGLang server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "SGLang does not support batch inference."
        )

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Return a text generator.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                **client_args,
                stream=True,
            )
            async for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`		An `openai.AsyncOpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Parameters:

Name	Type	Description	Default
`client`		An `openai.AsyncOpenAI` client instance.	required

Source code in outlines/models/sglang.py

def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.
    model_name
        The name of the model to use.

    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using sglang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/sglang.py

async def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using `sglang`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The SGLang server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Return a text generator.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Return a text generator.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )
        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`SGLang`

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with an SGLang server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the SGLang server.

Source code in outlines/models/sglang.py

class SGLang(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    an SGLang server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    SGLang server.

    """

    def __init__(self, client, model_name: Optional[str] = None):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.
        model_name
            The name of the model to use.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = SGLangTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The SGLang server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError(
            "SGLang does not support batch inference."
        )

    def generate_stream(
        self,
        model_input: Union[Chat, list, str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using SGLang.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the SGLang client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`		An `openai.OpenAI` client instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Source code in outlines/models/sglang.py

def __init__(self, client, model_name: Optional[str] = None):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.
    model_name
        The name of the model to use.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = SGLangTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using SGLang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/sglang.py

def generate(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The SGLang server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using SGLang.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/sglang.py

def generate_stream(
    self,
    model_input: Union[Chat, list, str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using SGLang.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`SGLangTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the SGLang and AsyncSGLang models.

Source code in outlines/models/sglang.py

class SGLangTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `SGLang` and `AsyncSGLang` models."""

    def format_input(self, model_input: Union[Chat, list, str]) -> list:
        """Generate the value of the messages argument to pass to the client.

        We rely on the OpenAITypeAdapter to format the input as the sglang
        server expects input in the same format as OpenAI.

        Parameters
        ----------
        model_input
            The input passed by the user.

        Returns
        -------
        list
            The formatted input to be passed to the client.

        """
        return OpenAITypeAdapter().format_input(model_input)

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the client.

        Parameters
        ----------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The formatted output type to be passed to the client.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            warnings.warn(
                "SGLang grammar-based structured outputs expects an EBNF "
                "grammar instead of a Lark grammar as is generally used in "
                "Outlines. The grammar cannot be used as a structured output "
                "type with an outlines backend, it is only compatible with "
                "the sglang and llguidance backends."
            )
            return {"extra_body": {"ebnf": term.definition}}
        elif isinstance(term, JsonSchema):
            return OpenAITypeAdapter().format_json_output_type(
                json.loads(term.schema)
            )
        else:
            return {"extra_body": {"regex": to_regex(term)}}

`format_input(model_input)`

Generate the value of the messages argument to pass to the client.

We rely on the OpenAITypeAdapter to format the input as the sglang server expects input in the same format as OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, list, str]`	The input passed by the user.	required

Returns:

Type	Description
`list`	The formatted input to be passed to the client.

Source code in outlines/models/sglang.py

def format_input(self, model_input: Union[Chat, list, str]) -> list:
    """Generate the value of the messages argument to pass to the client.

    We rely on the OpenAITypeAdapter to format the input as the sglang
    server expects input in the same format as OpenAI.

    Parameters
    ----------
    model_input
        The input passed by the user.

    Returns
    -------
    list
        The formatted input to be passed to the client.

    """
    return OpenAITypeAdapter().format_input(model_input)

`format_output_type(output_type=None)`

Generate the structured output argument to pass to the client.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The structured output type provided.	`None`

Returns:

Type	Description
`dict`	The formatted output type to be passed to the client.

Source code in outlines/models/sglang.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the client.

    Parameters
    ----------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The formatted output type to be passed to the client.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        warnings.warn(
            "SGLang grammar-based structured outputs expects an EBNF "
            "grammar instead of a Lark grammar as is generally used in "
            "Outlines. The grammar cannot be used as a structured output "
            "type with an outlines backend, it is only compatible with "
            "the sglang and llguidance backends."
        )
        return {"extra_body": {"ebnf": term.definition}}
    elif isinstance(term, JsonSchema):
        return OpenAITypeAdapter().format_json_output_type(
            json.loads(term.schema)
        )
    else:
        return {"extra_body": {"regex": to_regex(term)}}

`from_sglang(client, model_name=None)`

Create a SGLang or AsyncSGLang instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI]`	An `openai.OpenAI` or `openai.AsyncOpenAI` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[SGLang, AsyncSGLang]`	An Outlines `SGLang` or `AsyncSGLang` model instance.

Source code in outlines/models/sglang.py

def from_sglang(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[SGLang, AsyncSGLang]:
    """Create a `SGLang` or `AsyncSGLang` instance from an `openai.OpenAI` or
    `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[SGLang, AsyncSGLang]
        An Outlines `SGLang` or `AsyncSGLang` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return SGLang(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncSGLang(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

`tgi`

Integration with a TGI server.

`AsyncTGI`

Bases: AsyncModel

Thin async wrapper around a huggingface_hub.AsyncInferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.AsyncInferenceClient client.

Source code in outlines/models/tgi.py

class AsyncTGI(AsyncModel):
    """Thin async wrapper around a `huggingface_hub.AsyncInferenceClient`
    client used to communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.AsyncInferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `AsyncInferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    async def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.text_generation(**client_args)

        return response

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("TGI does not support batch inference.")

    async def generate_stream( # type: ignore
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.text_generation(
                **client_args, stream=True
            )
            async for chunk in stream:  # pragma: no cover
                yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

`init(client)`

Parameters:

Name	Type	Description	Default
`client`		A huggingface `AsyncInferenceClient` client instance.	required

Source code in outlines/models/tgi.py

def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `AsyncInferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/tgi.py

async def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.text_generation(**client_args)

    return response

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py

async def generate_stream( # type: ignore
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.text_generation(
            **client_args, stream=True
        )
        async for chunk in stream:  # pragma: no cover
            yield chunk

`TGI`

Bases: Model

Thin wrapper around a huggingface_hub.InferenceClient client used to communicate with a TGI server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the huggingface_hub.InferenceClient client.

Source code in outlines/models/tgi.py

class TGI(Model):
    """Thin wrapper around a `huggingface_hub.InferenceClient` client used to
    communicate with a `TGI` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the
    `huggingface_hub.InferenceClient` client.

    """

    def __init__(self, client):
        """
        Parameters
        ----------
        client
            A huggingface `InferenceClient` client instance.

        """
        self.client = client
        self.type_adapter = TGITypeAdapter()

    def generate(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> str:
        """Generate text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        str
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            return self.client.text_generation(**client_args)

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("TGI does not support batch inference.")

    def generate_stream(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using TGI.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types except `CFG` are supported provided your server uses
            a backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.text_generation(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                yield chunk

    def _build_client_args(
        self,
        model_input: str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the TGI client."""
        prompt = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        inference_kwargs.update(output_type_args)

        client_args = {
            "prompt": prompt,
            **inference_kwargs,
        }

        return client_args

`init(client)`

Parameters:

Name	Type	Description	Default
`client`		A huggingface `InferenceClient` client instance.	required

Source code in outlines/models/tgi.py

def __init__(self, client):
    """
    Parameters
    ----------
    client
        A huggingface `InferenceClient` client instance.

    """
    self.client = client
    self.type_adapter = TGITypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`str`	The text generated by the model.

Source code in outlines/models/tgi.py

def generate(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> str:
    """Generate text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    str
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        return self.client.text_generation(**client_args)

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using TGI.

Parameters:

Name	Type	Description	Default
`model_input`	`str`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/tgi.py

def generate_stream(
    self,
    model_input: str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using TGI.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types except `CFG` are supported provided your server uses
        a backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.text_generation(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            yield chunk

`TGITypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the TGI and AsyncTGI models.

Source code in outlines/models/tgi.py

class TGITypeAdapter(ModelTypeAdapter):
    """Type adapter for the `TGI` and `AsyncTGI` models."""

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the client.

        Argument
        --------
        model_input
            The input passed by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise NotImplementedError(
            f"The input type {input} is not available with TGI. "
            + "The only available type is `str`."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str:
        return model_input

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the client.

        Argument
        --------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The structured output argument to pass to the client.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            raise NotImplementedError(
                "TGI does not support CFG-based structured outputs."
            )
        elif isinstance(term, JsonSchema):
            return {
                "grammar": {
                    "type": "json",
                    "value": json.loads(term.schema),
                }
            }
        else:
            return {
                "grammar": {
                    "type": "regex",
                    "value": to_regex(term),
                }
            }

`format_input(model_input)`

Generate the prompt argument to pass to the client.

Argument

model_input The input passed by the user.

Returns:

Type	Description
`str`	The formatted input to be passed to the model.

Source code in outlines/models/tgi.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the client.

    Argument
    --------
    model_input
        The input passed by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise NotImplementedError(
        f"The input type {input} is not available with TGI. "
        + "The only available type is `str`."
    )

`format_output_type(output_type=None)`

Generate the structured output argument to pass to the client.

Argument

output_type The structured output type provided.

Returns:

Type	Description
`dict`	The structured output argument to pass to the client.

Source code in outlines/models/tgi.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the client.

    Argument
    --------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The structured output argument to pass to the client.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        raise NotImplementedError(
            "TGI does not support CFG-based structured outputs."
        )
    elif isinstance(term, JsonSchema):
        return {
            "grammar": {
                "type": "json",
                "value": json.loads(term.schema),
            }
        }
    else:
        return {
            "grammar": {
                "type": "regex",
                "value": to_regex(term),
            }
        }

`from_tgi(client)`

Create an Outlines TGI or AsyncTGI model instance from an huggingface_hub.InferenceClient or huggingface_hub.AsyncInferenceClient instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[InferenceClient, AsyncInferenceClient]`	An `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient` instance.	required

Returns:

Type	Description
`Union[TGI, AsyncTGI]`	An Outlines `TGI` or `AsyncTGI` model instance.

Source code in outlines/models/tgi.py

def from_tgi(
    client: Union["InferenceClient", "AsyncInferenceClient"],
) -> Union[TGI, AsyncTGI]:
    """Create an Outlines `TGI` or `AsyncTGI` model instance from an
    `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient`
    instance.

    Parameters
    ----------
    client
        An `huggingface_hub.InferenceClient` or
        `huggingface_hub.AsyncInferenceClient` instance.

    Returns
    -------
    Union[TGI, AsyncTGI]
        An Outlines `TGI` or `AsyncTGI` model instance.

    """
    from huggingface_hub import AsyncInferenceClient, InferenceClient

    if isinstance(client, InferenceClient):
        return TGI(client)
    elif isinstance(client, AsyncInferenceClient):
        return AsyncTGI(client)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            + "Please provide an HuggingFace InferenceClient "
            + "or AsyncInferenceClient instance."
        )

`tokenizer`

`Tokenizer`

Bases: Hashable, Protocol

Source code in outlines/models/tokenizer.py

class Tokenizer(Hashable, Protocol):
    eos_token: str
    eos_token_id: int
    pad_token_id: int
    vocabulary: Dict[str, int]
    special_tokens: Set[str]

    def encode(
        self, prompt: Union[str, List[str]]
    ) -> "Tuple['NDArray[np.int64]', 'NDArray[np.int64]']":
        """Translate the input prompts into arrays of token ids and attention mask."""
        ...

    def decode(self, token_ids: "NDArray[np.int64]") -> List[str]:
        """Translate an array of token ids to a string or list of strings."""
        ...

    def convert_token_to_string(self, token: str) -> str:
        """Convert a token to its equivalent string.

        This is for instance useful for BPE tokenizers where whitespaces are
        represented by the special characted `Ġ`. This prevents matching a raw
        token that includes `Ġ` with a string.
        """
        ...

`convert_token_to_string(token)`

Convert a token to its equivalent string.

This is for instance useful for BPE tokenizers where whitespaces are represented by the special characted Ġ. This prevents matching a raw token that includes Ġ with a string.

Source code in outlines/models/tokenizer.py

def convert_token_to_string(self, token: str) -> str:
    """Convert a token to its equivalent string.

    This is for instance useful for BPE tokenizers where whitespaces are
    represented by the special characted `Ġ`. This prevents matching a raw
    token that includes `Ġ` with a string.
    """
    ...

`decode(token_ids)`

Translate an array of token ids to a string or list of strings.

Source code in outlines/models/tokenizer.py

23
24
25

def decode(self, token_ids: "NDArray[np.int64]") -> List[str]:
    """Translate an array of token ids to a string or list of strings."""
    ...

`encode(prompt)`

Translate the input prompts into arrays of token ids and attention mask.

Source code in outlines/models/tokenizer.py

def encode(
    self, prompt: Union[str, List[str]]
) -> "Tuple['NDArray[np.int64]', 'NDArray[np.int64]']":
    """Translate the input prompts into arrays of token ids and attention mask."""
    ...

`transformers`

Integration with the transformers library.

Local runtime calls intentionally bypass outlines.exceptions.normalize_provider_errors().

`TransformerTokenizer`

Bases: Tokenizer

Represents a tokenizer for models in the transformers library.

Source code in outlines/models/transformers.py

class TransformerTokenizer(Tokenizer):
    """Represents a tokenizer for models in the `transformers` library."""

    def __init__(self, tokenizer: "PreTrainedTokenizer", **kwargs):
        self.tokenizer = tokenizer
        self.eos_token_id = self.tokenizer.eos_token_id
        self.eos_token = self.tokenizer.eos_token
        self.get_vocab = self.tokenizer.get_vocab

        if self.tokenizer.pad_token_id is None:
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
            self.pad_token_id = self.eos_token_id
        else:
            self.pad_token_id = self.tokenizer.pad_token_id
            self.pad_token = self.tokenizer.pad_token

        self.special_tokens = set(self.tokenizer.all_special_tokens)

        self.vocabulary = self.tokenizer.get_vocab()
        self.is_llama = isinstance(self.tokenizer, get_llama_tokenizer_types())

    def encode(
        self, prompt: Union[str, List[str]], **kwargs
    ) -> Tuple["torch.LongTensor", "torch.LongTensor"]:
        kwargs["padding"] = True
        kwargs["return_tensors"] = "pt"
        output = self.tokenizer(prompt, **kwargs)
        return output["input_ids"], output["attention_mask"]

    def decode(self, token_ids: "torch.LongTensor") -> List[str]:
        text = self.tokenizer.batch_decode(token_ids, skip_special_tokens=True)
        return text

    def convert_token_to_string(self, token: str) -> str:
        string = self.tokenizer.convert_tokens_to_string([token])

        if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
            return " " + string

        return string

    def __eq__(self, other):
        if isinstance(other, type(self)):
            if hasattr(self, "model_name") and hasattr(self, "kwargs"):
                return (
                    other.model_name == self.model_name and other.kwargs == self.kwargs
                )
            else:
                return other.tokenizer == self.tokenizer
        return NotImplemented

    def __hash__(self):
        from datasets.fingerprint import Hasher

        return hash(Hasher.hash(self.tokenizer))

    def __getstate__(self):
        state = {"tokenizer": self.tokenizer}
        return state

    def __setstate__(self, state):
        self.__init__(state["tokenizer"])

`Transformers`

Bases: Model

Thin wrapper around a transformers model and a transformers tokenizer.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and tokenizer.

Source code in outlines/models/transformers.py

class Transformers(Model):
    """Thin wrapper around a `transformers` model and a `transformers`
    tokenizer.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    tokenizer.

    """

    def __init__(
        self,
        model: "PreTrainedModel",
        tokenizer: "PreTrainedTokenizer",
        *,
        device_dtype: Optional["torch.dtype"] = None,
    ):
        """
        Parameters:
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        tokenizer
            A `PreTrainedTokenizer`, or any tokenizer that is compatible with
            the `transformers` API for tokenizers.
        device_dtype
            The dtype to use for the model. If not provided, the model will use
            the default dtype.

        """
        # We need to handle the cases in which jax/flax or tensorflow
        # is not available in the environment.
        try:
            from transformers import FlaxPreTrainedModel
        except ImportError:  # pragma: no cover
            FlaxPreTrainedModel = None

        try:
            from transformers import TFPreTrainedModel
        except ImportError:  # pragma: no cover
            TFPreTrainedModel = None

        tokenizer.padding_side = "left"
        self.model = model
        self.hf_tokenizer = tokenizer
        self.tokenizer = TransformerTokenizer(tokenizer)
        self.device_dtype = device_dtype
        self.type_adapter = TransformersTypeAdapter(
            tokenizer=tokenizer,
            has_chat_template=_check_hf_chat_template(tokenizer)
        )

        if (
            FlaxPreTrainedModel is not None
            and isinstance(model, FlaxPreTrainedModel)
        ):  # pragma: no cover
            self.tensor_library_name = "jax"
            warnings.warn("""
                Support for `jax` has been deprecated and will be removed in
                version 1.4.0 of Outlines. Please use `torch` instead.
                Transformers models using `jax` do not support structured
                generation.
                """,
                DeprecationWarning,
                stacklevel=2,
            )
        elif (
            TFPreTrainedModel is not None
            and isinstance(model, TFPreTrainedModel)
        ):  # pragma: no cover
            self.tensor_library_name = "tensorflow"
            warnings.warn("""
                Support for `tensorflow` has been deprecated and will be removed in
                version 1.4.0 of Outlines. Please use `torch` instead.
                Transformers models using `tensorflow` do not support structured
                generation.
                """,
                DeprecationWarning,
                stacklevel=2,
            )
        else:
            self.tensor_library_name = "torch"

    def _prepare_model_inputs(
        self,
        model_input,
        is_batch: bool = False,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        # Format validation
        if is_batch:
            prompts = [
                self.type_adapter.format_input(item)
                for item in model_input
            ]
        else:
            prompts = self.type_adapter.format_input(model_input)
        input_ids, attention_mask = self.tokenizer.encode(prompts)
        inputs = {
            "input_ids": input_ids.to(self.model.device),
            "attention_mask": (
                attention_mask.to(self.model.device, dtype=self.device_dtype)
                if self.device_dtype is not None
                else attention_mask.to(self.model.device)
            ),
        }

        return prompts, inputs

    def generate(
        self,
        model_input: Union[str, dict, Chat],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str]]:
        """Generate text using `transformers`.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response. For
            multi-modal models, the input should be a dictionary containing the
            `text` key with a value of type `Union[str, List[str]]` and the
            other keys required by the model.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            of the `transformers` model.

        Returns
        -------
        Union[str, List[str]]
            The text generated by the model.

        """
        prompts, inputs = self._prepare_model_inputs(model_input, False)
        logits_processor = self.type_adapter.format_output_type(output_type)

        generated_ids = self._generate_output_seq(
            prompts,
            inputs,
            logits_processor=logits_processor,
            **inference_kwargs,
        )

        # required for multi-modal models that return a 2D tensor even when
        # num_return_sequences is 1
        num_samples = inference_kwargs.get("num_return_sequences", 1)
        if num_samples == 1 and len(generated_ids.shape) == 2:
            generated_ids = generated_ids.squeeze(0)

        return self._decode_generation(generated_ids)

    def generate_batch(
        self,
        model_input: List[Union[str, dict, Chat]],
        output_type: Optional[OutlinesLogitsProcessor] = None,
        **inference_kwargs: Any,
    ) -> List[Union[str, List[str]]]:
        """"""
        prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore
        logits_processor = self.type_adapter.format_output_type(output_type)

        generated_ids = self._generate_output_seq(
            prompts, inputs, logits_processor=logits_processor, **inference_kwargs
        )

        # if there are multiple samples per input, convert generated_id to 3D
        num_samples = inference_kwargs.get("num_return_sequences", 1)
        if num_samples > 1:
            generated_ids = generated_ids.view(len(model_input), num_samples, -1)

        return self._decode_generation(generated_ids)

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `transformers` models.

        TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

        """
        raise NotImplementedError(
            "Streaming is not implemented for Transformers models."
        )

    def _generate_output_seq(self, prompts, inputs, **inference_kwargs):
        input_ids = inputs["input_ids"]

        output_ids = self.model.generate(
            **inputs,
            **inference_kwargs,
        )

        # encoder-decoder returns output_ids only, decoder-only returns full seq ids
        if self.model.config.is_encoder_decoder:
            generated_ids = output_ids
        else:
            generated_ids = output_ids[:, input_ids.shape[1] :]

        return generated_ids

    def _decode_generation(self, generated_ids: "torch.Tensor"):
        if len(generated_ids.shape) == 1:
            return self.tokenizer.decode([generated_ids])[0]
        elif len(generated_ids.shape) == 2:
            return self.tokenizer.decode(generated_ids)
        elif len(generated_ids.shape) == 3:
            return [
                self.tokenizer.decode(generated_ids[i])
                for i in range(len(generated_ids))
            ]
        else:  # pragma: no cover
            raise TypeError(
                "Generated outputs aren't 1D, 2D or 3D, but instead are "
                f"{generated_ids.shape}"
            )

`init(model, tokenizer, *, device_dtype=None)`

Parameters:

model A PreTrainedModel, or any model that is compatible with the transformers API for models. tokenizer A PreTrainedTokenizer, or any tokenizer that is compatible with the transformers API for tokenizers. device_dtype The dtype to use for the model. If not provided, the model will use the default dtype.

Source code in outlines/models/transformers.py

def __init__(
    self,
    model: "PreTrainedModel",
    tokenizer: "PreTrainedTokenizer",
    *,
    device_dtype: Optional["torch.dtype"] = None,
):
    """
    Parameters:
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    tokenizer
        A `PreTrainedTokenizer`, or any tokenizer that is compatible with
        the `transformers` API for tokenizers.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    """
    # We need to handle the cases in which jax/flax or tensorflow
    # is not available in the environment.
    try:
        from transformers import FlaxPreTrainedModel
    except ImportError:  # pragma: no cover
        FlaxPreTrainedModel = None

    try:
        from transformers import TFPreTrainedModel
    except ImportError:  # pragma: no cover
        TFPreTrainedModel = None

    tokenizer.padding_side = "left"
    self.model = model
    self.hf_tokenizer = tokenizer
    self.tokenizer = TransformerTokenizer(tokenizer)
    self.device_dtype = device_dtype
    self.type_adapter = TransformersTypeAdapter(
        tokenizer=tokenizer,
        has_chat_template=_check_hf_chat_template(tokenizer)
    )

    if (
        FlaxPreTrainedModel is not None
        and isinstance(model, FlaxPreTrainedModel)
    ):  # pragma: no cover
        self.tensor_library_name = "jax"
        warnings.warn("""
            Support for `jax` has been deprecated and will be removed in
            version 1.4.0 of Outlines. Please use `torch` instead.
            Transformers models using `jax` do not support structured
            generation.
            """,
            DeprecationWarning,
            stacklevel=2,
        )
    elif (
        TFPreTrainedModel is not None
        and isinstance(model, TFPreTrainedModel)
    ):  # pragma: no cover
        self.tensor_library_name = "tensorflow"
        warnings.warn("""
            Support for `tensorflow` has been deprecated and will be removed in
            version 1.4.0 of Outlines. Please use `torch` instead.
            Transformers models using `tensorflow` do not support structured
            generation.
            """,
            DeprecationWarning,
            stacklevel=2,
        )
    else:
        self.tensor_library_name = "torch"

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using transformers.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[str, dict, Chat]`	The prompt based on which the model will generate a response. For multi-modal models, the input should be a dictionary containing the `text` key with a value of type `Union[str, List[str]]` and the other keys required by the model.	required
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method of the `transformers` model.	`{}`

Returns:

Type	Description
`Union[str, List[str]]`	The text generated by the model.

Source code in outlines/models/transformers.py

def generate(
    self,
    model_input: Union[str, dict, Chat],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str]]:
    """Generate text using `transformers`.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response. For
        multi-modal models, the input should be a dictionary containing the
        `text` key with a value of type `Union[str, List[str]]` and the
        other keys required by the model.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        of the `transformers` model.

    Returns
    -------
    Union[str, List[str]]
        The text generated by the model.

    """
    prompts, inputs = self._prepare_model_inputs(model_input, False)
    logits_processor = self.type_adapter.format_output_type(output_type)

    generated_ids = self._generate_output_seq(
        prompts,
        inputs,
        logits_processor=logits_processor,
        **inference_kwargs,
    )

    # required for multi-modal models that return a 2D tensor even when
    # num_return_sequences is 1
    num_samples = inference_kwargs.get("num_return_sequences", 1)
    if num_samples == 1 and len(generated_ids.shape) == 2:
        generated_ids = generated_ids.squeeze(0)

    return self._decode_generation(generated_ids)

`generate_batch(model_input, output_type=None, **inference_kwargs)`

Source code in outlines/models/transformers.py

def generate_batch(
    self,
    model_input: List[Union[str, dict, Chat]],
    output_type: Optional[OutlinesLogitsProcessor] = None,
    **inference_kwargs: Any,
) -> List[Union[str, List[str]]]:
    """"""
    prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore
    logits_processor = self.type_adapter.format_output_type(output_type)

    generated_ids = self._generate_output_seq(
        prompts, inputs, logits_processor=logits_processor, **inference_kwargs
    )

    # if there are multiple samples per input, convert generated_id to 3D
    num_samples = inference_kwargs.get("num_return_sequences", 1)
    if num_samples > 1:
        generated_ids = generated_ids.view(len(model_input), num_samples, -1)

    return self._decode_generation(generated_ids)

`generate_stream(model_input, output_type, **inference_kwargs)`

Not available for transformers models.

TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

Source code in outlines/models/transformers.py

def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `transformers` models.

    TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810

    """
    raise NotImplementedError(
        "Streaming is not implemented for Transformers models."
    )

`TransformersMultiModal`

Bases: Transformers

Thin wrapper around a transformers model and a transformers processor.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the transformers model and processor.

Source code in outlines/models/transformers.py

class TransformersMultiModal(Transformers):
    """Thin wrapper around a `transformers` model and a `transformers`
    processor.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `transformers` model and
    processor.

    """

    def __init__(
        self,
        model: "PreTrainedModel",
        processor,
        *,
        device_dtype: Optional["torch.dtype"] = None,
    ):
        """Create a TransformersMultiModal model instance

        We rely on the `__init__` method of the `Transformers` class to handle
        most of the initialization and then add elements specific to multimodal
        models.

        Parameters
        ----------
        model
            A `PreTrainedModel`, or any model that is compatible with the
            `transformers` API for models.
        processor
            A `ProcessorMixin` instance.
        device_dtype
            The dtype to use for the model. If not provided, the model will use
            the default dtype.

        """
        self.processor = processor
        self.processor.padding_side = "left"
        self.processor.pad_token = "[PAD]"

        tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

        super().__init__(model, tokenizer, device_dtype=device_dtype)

        self.type_adapter = TransformersMultiModalTypeAdapter(
            tokenizer=tokenizer
        )

    def _prepare_model_inputs(
        self,
        model_input,
        is_batch: bool = False,
    ) -> Tuple[Union[str, List[str]], dict]:
        """Turn the user input into arguments to pass to the model"""
        if is_batch:
            prompts = [
                self.type_adapter.format_input(item) for item in model_input
            ]
        else:
            prompts = self.type_adapter.format_input(model_input)

        # The expected format is a single dict
        if is_batch:
            merged_prompts = defaultdict(list)
            for d in prompts:
                for key, value in d.items():
                    if key == "text":
                        merged_prompts[key].append(value)
                    else:
                        merged_prompts[key].extend(value)
        else:
            merged_prompts = prompts # type: ignore

        inputs = self.processor(
            **merged_prompts, padding=True, return_tensors="pt"
        )
        if self.device_dtype is not None:
            inputs = inputs.to(self.model.device, dtype=self.device_dtype)
        else:
            inputs = inputs.to(self.model.device)

        return merged_prompts["text"], inputs

`init(model, processor, *, device_dtype=None)`

Create a TransformersMultiModal model instance

We rely on the __init__ method of the Transformers class to handle most of the initialization and then add elements specific to multimodal models.

Parameters:

Name	Type	Description	Default
`model`	`PreTrainedModel`	A `PreTrainedModel`, or any model that is compatible with the `transformers` API for models.	required
`processor`		A `ProcessorMixin` instance.	required
`device_dtype`	`Optional[dtype]`	The dtype to use for the model. If not provided, the model will use the default dtype.	`None`

Source code in outlines/models/transformers.py

def __init__(
    self,
    model: "PreTrainedModel",
    processor,
    *,
    device_dtype: Optional["torch.dtype"] = None,
):
    """Create a TransformersMultiModal model instance

    We rely on the `__init__` method of the `Transformers` class to handle
    most of the initialization and then add elements specific to multimodal
    models.

    Parameters
    ----------
    model
        A `PreTrainedModel`, or any model that is compatible with the
        `transformers` API for models.
    processor
        A `ProcessorMixin` instance.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    """
    self.processor = processor
    self.processor.padding_side = "left"
    self.processor.pad_token = "[PAD]"

    tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer

    super().__init__(model, tokenizer, device_dtype=device_dtype)

    self.type_adapter = TransformersMultiModalTypeAdapter(
        tokenizer=tokenizer
    )

`TransformersMultiModalTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for TransformersMultiModal model.

Source code in outlines/models/transformers.py

class TransformersMultiModalTypeAdapter(ModelTypeAdapter):
    """Type adapter for `TransformersMultiModal` model."""

    def __init__(self, **kwargs):
        self.tokenizer = kwargs.get("tokenizer")

    @singledispatchmethod
    def format_input(self, model_input):
        """Fomat the prompt arguments to pass to the model.

        Argument
        --------
        model_input
            The input passed by the user.

        Returns
        -------
        dict
            The formatted input.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available. Please "
            + "provide a list containing a text prompt and assets "
            + "(`Image`, `Audio` or `Video` instances) supported by your "
            + "model or a `Chat` instance."
        )

    @format_input.register(Chat)
    def format_chat_input(self, model_input: Chat) -> dict:
        conversation = []
        assets = []

        # process each message, convert if needed to standardized multimodal chat template format
        # and collect assets for HF processor
        for message in model_input.messages:
            processed_message, message_assets = self._prepare_message(
                message["role"], message["content"]
            )
            conversation.append(processed_message)
            assets.extend(message_assets)

        formatted_prompt = self.tokenizer.apply_chat_template(
            conversation,
            tokenize=False,
            add_generation_prompt=True
        )
        # use the formatted prompt and the assets to format the input
        return self.format_list_input([formatted_prompt, *assets])

    def _prepare_message(self, role: str, content: str | list) -> tuple[dict, list]:
        """Create a message."""
        if isinstance(content, str):
            return {"role": role, "content": content}, []

        elif isinstance(content, list):
            if all(isinstance(item, dict) for item in content): # HF multimodal chat template
                return {"role": role, "content": content}, self._extract_assets_from_content(content)
            else: # list of string + assets
                prompt = content[0]
                assets = content[1:]
                assets_dict = [self._format_asset_for_template(asset) for asset in assets]

                return {"role": role, "content": [
                    {"type": "text", "text": prompt},
                    *assets_dict
                ]}, assets
        else:
            raise ValueError(
                f"Invalid content type: {type(content)}. "
                + "The content must be a string or a list containing text and assets "
                + "or a list of dict items with explicit types."
            )

    def _extract_assets_from_content(self, content: list) -> list:
        """Process a list of dict items."""
        assets = []

        for item in content:
            if len(item) > 2:
                raise ValueError(
                    f"Found item with multiple keys: {item}. "
                    + "Each item in the content list must be a dictionary with a 'type' key and a single asset key. "
                    + "To include multiple assets, use separate dictionary items. "
                    + "For example: [{{'type': 'image', 'image': image1}}, {{'type': 'image', 'image': image2}}]. "
                )

            if "type" not in item:
                raise ValueError(
                    "Each item in the content list must be a dictionary with a 'type' key. "
                    + "Valid types are 'text', 'image', 'video', or 'audio'. "
                    + "For instance {{'type': 'text', 'text': 'your message'}}. "
                    + f"Found item without 'type' key: {item}"
                )
            if item["type"] == "text":
                continue
            elif item["type"] in ["image", "video", "audio"]:
                asset_key = item["type"]
                if asset_key not in item:
                    raise ValueError(
                        f"Item with type '{asset_key}' must contain a '{asset_key}' key. "
                        + f"Found item: {item}"
                    )
                if isinstance(item[asset_key], (Image, Video, Audio)):
                    assets.append(item[asset_key])
                else:
                    raise ValueError(
                        "Assets must be of type `Image`, `Video` or `Audio`. "
                        + f"Unsupported asset type: {type(item[asset_key])}"
                    )
            else:
                raise ValueError(
                    "Content must be 'text', 'image', 'video' or 'audio'. "
                    + f"Unsupported content type: {item['type']}")
        return assets

    def _format_asset_for_template(self, asset: Image | Video | Audio) -> dict:
        """Process an asset."""
        if isinstance(asset, Image):
            return {"type": "image", "image": asset}
        elif isinstance(asset, Video):
            return {"type": "video", "video": asset}
        elif isinstance(asset, Audio):
            return {"type": "audio", "audio": asset}
        else:
            raise ValueError(
                "Assets must be of type `Image`, `Video` or `Audio`. "
                + f"Unsupported asset type: {type(asset)}"
            )

    @format_input.register(list)
    def format_list_input(self, model_input: list) -> dict:
        prompt = model_input[0]
        assets = model_input[1:]

        if not assets:  # handle empty assets case
            return {"text": prompt}

        asset_types = set(type(asset) for asset in assets)
        if len(asset_types) > 1:
            raise ValueError(
                "All assets must be of the same type. "
                + f"Found types: {asset_types}"
            )
        asset_type = asset_types.pop()

        if asset_type == Image:
            return {
                "text": prompt,
                "images": [asset.image for asset in assets]
            }
        elif asset_type == Audio: # pragma: no cover
            return {
                "text": prompt,
                "audio": [asset.audio for asset in assets]
            }
        elif asset_type == Video: # pragma: no cover
            return {
                "text": prompt,
                "videos": [asset.video for asset in assets]
            }
        else:
            raise ValueError(f"Unsupported asset type: {asset_type}")

    def format_output_type(
        self,
        output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional["LogitsProcessorList"]:
        """Generate the logits processor argument to pass to the model.

        Argument
        --------
        output_type
            The logits processor provided.

        Returns
        -------
        Optional[LogitsProcessorList]
            The logits processor to pass to the model.

        """
        from transformers import LogitsProcessorList

        if output_type is not None:
            return LogitsProcessorList([output_type])
        return None

`format_input(model_input)`

Fomat the prompt arguments to pass to the model.

Argument

model_input The input passed by the user.

Returns:

Type	Description
`dict`	The formatted input.

Source code in outlines/models/transformers.py

@singledispatchmethod
def format_input(self, model_input):
    """Fomat the prompt arguments to pass to the model.

    Argument
    --------
    model_input
        The input passed by the user.

    Returns
    -------
    dict
        The formatted input.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available. Please "
        + "provide a list containing a text prompt and assets "
        + "(`Image`, `Audio` or `Video` instances) supported by your "
        + "model or a `Chat` instance."
    )

`format_output_type(output_type=None)`

Generate the logits processor argument to pass to the model.

Argument

output_type The logits processor provided.

Returns:

Type	Description
`Optional[LogitsProcessorList]`	The logits processor to pass to the model.

Source code in outlines/models/transformers.py

def format_output_type(
    self,
    output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional["LogitsProcessorList"]:
    """Generate the logits processor argument to pass to the model.

    Argument
    --------
    output_type
        The logits processor provided.

    Returns
    -------
    Optional[LogitsProcessorList]
        The logits processor to pass to the model.

    """
    from transformers import LogitsProcessorList

    if output_type is not None:
        return LogitsProcessorList([output_type])
    return None

`TransformersTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the Transformers model.

Source code in outlines/models/transformers.py

class TransformersTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `Transformers` model."""

    def __init__(self, tokenizer: "PreTrainedTokenizer", has_chat_template: bool = False):
        self.tokenizer = tokenizer
        self.has_chat_template = has_chat_template

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Parameters
        ----------
        model_input
            The input passed by the user.

        Returns
        -------
        str
            The formatted input to be passed to the model.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available."
            "The only available types are `str` and `Chat`."
        )

    @format_input.register(str)
    def format_str_input(self, model_input: str) -> str:
        if self.has_chat_template:
            return self.format_chat_input(Chat([{"role": "user", "content": model_input}]))
        return model_input

    @format_input.register(Chat)
    def format_chat_input(self, model_input: Chat) -> str:
        return self.tokenizer.apply_chat_template(
            model_input.messages,
            tokenize=False,
            add_generation_prompt=True,
        )

    def format_output_type(
        self,
        output_type: Optional[OutlinesLogitsProcessor] = None,
    ) -> Optional["LogitsProcessorList"]:
        """Generate the logits processor argument to pass to the model.

        Parameters
        ----------
        output_type
            The logits processor provided.

        Returns
        -------
        Optional[LogitsProcessorList]
            The logits processor to pass to the model.

        """
        from transformers import LogitsProcessorList

        if output_type is not None:
            return LogitsProcessorList([output_type])
        return None

`format_input(model_input)`

Generate the prompt argument to pass to the model.

Parameters:

Name	Type	Description	Default
`model_input`		The input passed by the user.	required

Returns:

Type	Description
`str`	The formatted input to be passed to the model.

Source code in outlines/models/transformers.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Parameters
    ----------
    model_input
        The input passed by the user.

    Returns
    -------
    str
        The formatted input to be passed to the model.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available."
        "The only available types are `str` and `Chat`."
    )

`format_output_type(output_type=None)`

Generate the logits processor argument to pass to the model.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[OutlinesLogitsProcessor]`	The logits processor provided.	`None`

Returns:

Type	Description
`Optional[LogitsProcessorList]`	The logits processor to pass to the model.

Source code in outlines/models/transformers.py

def format_output_type(
    self,
    output_type: Optional[OutlinesLogitsProcessor] = None,
) -> Optional["LogitsProcessorList"]:
    """Generate the logits processor argument to pass to the model.

    Parameters
    ----------
    output_type
        The logits processor provided.

    Returns
    -------
    Optional[LogitsProcessorList]
        The logits processor to pass to the model.

    """
    from transformers import LogitsProcessorList

    if output_type is not None:
        return LogitsProcessorList([output_type])
    return None

`from_transformers(model, tokenizer_or_processor, *, device_dtype=None)`

Create an Outlines Transformers or TransformersMultiModal model instance from a PreTrainedModel instance and a PreTrainedTokenizer or ProcessorMixin instance.

outlines supports PreTrainedModelForCausalLM, PreTrainedMambaForCausalLM, PreTrainedModelForSeq2Seq and any model that implements the transformers model API.

Parameters:

Name	Type	Description	Default
`model`	`PreTrainedModel`	A `transformers.PreTrainedModel` instance.	required
`tokenizer_or_processor`	`Union[PreTrainedTokenizer, ProcessorMixin]`	A `transformers.PreTrainedTokenizer` or `transformers.ProcessorMixin` instance.	required
`device_dtype`	`Optional[dtype]`	The dtype to use for the model. If not provided, the model will use the default dtype.	`None`

Returns:

Type	Description
`Union[Transformers, TransformersMultiModal]`	An Outlines `Transformers` or `TransformersMultiModal` model instance.

Source code in outlines/models/transformers.py

def from_transformers(
    model: "PreTrainedModel",
    tokenizer_or_processor: Union["PreTrainedTokenizer", "ProcessorMixin"],
    *,
    device_dtype: Optional["torch.dtype"] = None,
) -> Union[Transformers, TransformersMultiModal]:
    """Create an Outlines `Transformers` or `TransformersMultiModal` model
    instance from a `PreTrainedModel` instance and a `PreTrainedTokenizer` or
    `ProcessorMixin` instance.

    `outlines` supports `PreTrainedModelForCausalLM`,
    `PreTrainedMambaForCausalLM`, `PreTrainedModelForSeq2Seq` and any model
    that implements the `transformers` model API.

    Parameters
    ----------
    model
        A `transformers.PreTrainedModel` instance.
    tokenizer_or_processor
        A `transformers.PreTrainedTokenizer` or
        `transformers.ProcessorMixin` instance.
    device_dtype
        The dtype to use for the model. If not provided, the model will use
        the default dtype.

    Returns
    -------
    Union[Transformers, TransformersMultiModal]
        An Outlines `Transformers` or `TransformersMultiModal` model instance.

    """
    from transformers import (
        PreTrainedTokenizer, PreTrainedTokenizerFast, ProcessorMixin)

    if isinstance(
        tokenizer_or_processor, (PreTrainedTokenizer, PreTrainedTokenizerFast)
    ):
        tokenizer = tokenizer_or_processor
        return Transformers(model, tokenizer, device_dtype=device_dtype)
    elif isinstance(tokenizer_or_processor, ProcessorMixin):
        processor = tokenizer_or_processor
        return TransformersMultiModal(model, processor, device_dtype=device_dtype)
    else:
        raise ValueError(
            "We couldn't determine whether the model passed to `from_transformers`"
            + " is a text-2-text or a multi-modal model. Please provide a "
            + "a transformers tokenizer or processor."
        )

`get_llama_tokenizer_types()`

Get all the Llama tokenizer types/classes that need work-arounds.

When they can't be imported, a dummy class is created.

Source code in outlines/models/transformers.py

def get_llama_tokenizer_types():
    """Get all the Llama tokenizer types/classes that need work-arounds.

    When they can't be imported, a dummy class is created.

    """
    try:
        from transformers.models.llama import LlamaTokenizer
    except ImportError:  # pragma: no cover

        class LlamaTokenizer:  # type: ignore
            pass

    try:
        from transformers.models.llama import LlamaTokenizerFast
    except ImportError:  # pragma: no cover

        class LlamaTokenizerFast:  # type: ignore
            pass

    try:
        from transformers.models.code_llama import CodeLlamaTokenizer
    except ImportError:  # pragma: no cover

        class CodeLlamaTokenizer:  # type: ignore
            pass

    try:
        from transformers.models.code_llama import CodeLlamaTokenizerFast
    except ImportError:  # pragma: no cover

        class CodeLlamaTokenizerFast:  # type: ignore
            pass

    return (
        LlamaTokenizer,
        LlamaTokenizerFast,
        CodeLlamaTokenizer,
        CodeLlamaTokenizerFast,
    )

`utils`

`set_additional_properties_false_json_schema(schema)`

Set additionalProperties to False to all objects in the schema using jsonpath.

Parameters:

Name	Type	Description	Default
`schema`	`dict`	The JSON schema to modify	required

Returns:

Type	Description
`dict`	The modified schema with additionalProperties set to False

Source code in outlines/models/utils.py

def set_additional_properties_false_json_schema(schema: dict) -> dict:
    """Set additionalProperties to False to all objects in the schema using jsonpath.

    Parameters
    ----------
    schema
        The JSON schema to modify

    Returns
    -------
    dict
        The modified schema with additionalProperties set to False
    """
    # Get all nodes
    jsonpath_expr = jsonpath_ng.parse('$..*')
    matches = jsonpath_expr.find(schema)

    # Go over all nodes and set additionalProperties to False if it's an object
    for match in matches:
        if match.value == 'object':
            if 'additionalProperties' not in match.context.value:
                match.context.value['additionalProperties'] = False

    return schema

`vllm`

Integration with a vLLM server.

`AsyncVLLM`

Bases: AsyncModel

Thin async wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py

class AsyncVLLM(AsyncModel):
    """Thin async wrapper around the `openai.OpenAI` client used to communicate
    with a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "AsyncOpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.AsyncOpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    async def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = await self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    async def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("VLLM does not support batch inference.")

    async def generate_stream( # type: ignore
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> AsyncIterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        AsyncIterator[str]
            An async iterator that yields the text generated by the model.
        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = await self.client.chat.completions.create(
                **client_args,
                stream=True,
            )
            async for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`AsyncOpenAI`	An `openai.AsyncOpenAI` client instance.	required

Source code in outlines/models/vllm.py

def __init__(
    self,
    client: "AsyncOpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.AsyncOpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)` `async`

Generate text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/vllm.py

async def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = await self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

Stream text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`AsyncIterator[str]`	An async iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py

async def generate_stream( # type: ignore
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> AsyncIterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    AsyncIterator[str]
        An async iterator that yields the text generated by the model.
    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = await self.client.chat.completions.create(
            **client_args,
            stream=True,
        )
        async for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`VLLM`

Bases: Model

Thin wrapper around the openai.OpenAI client used to communicate with a vllm server.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the openai.OpenAI client for the vllm server.

Source code in outlines/models/vllm.py

class VLLM(Model):
    """Thin wrapper around the `openai.OpenAI` client used to communicate with
    a `vllm` server.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `openai.OpenAI` client for the
    `vllm` server.
    """

    def __init__(
        self,
        client: "OpenAI",
        model_name: Optional[str] = None,
    ):
        """
        Parameters
        ----------
        client
            An `openai.OpenAI` client instance.

        """
        self.client = client
        self.model_name = model_name
        self.type_adapter = VLLMTypeAdapter()

    def generate(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, list[str]]:
        """Generate text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Union[str, list[str]]
            The text generated by the model.

        """
        client_args = self._build_client_args(
            model_input,
            output_type,
            **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            response = self.client.chat.completions.create(**client_args)

        messages = [choice.message for choice in response.choices]
        for message in messages:
            if message.refusal is not None:  # pragma: no cover
                raise GenerationError(
                    f"The vLLM server refused to answer the request: "
                    f"{message.refusal}",
                    provider=PROVIDER,
                )

        if len(messages) == 1:
            return messages[0].content
        else:
            return [message.content for message in messages]

    def generate_batch(
        self,
        model_input,
        output_type = None,
        **inference_kwargs,
    ):
        raise NotImplementedError("VLLM does not support batch inference.")

    def generate_stream(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Iterator[str]:
        """Stream text using vLLM.

        Parameters
        ----------
        model_input
            The prompt based on which the model will generate a response.
        output_type
            The desired format of the response generated by the model. All
            output types available in Outlines are supported provided your
            server uses a structured generation backend that supports them.
        inference_kwargs
            Additional keyword arguments to pass to the client.

        Returns
        -------
        Iterator[str]
            An iterator that yields the text generated by the model.

        """
        client_args = self._build_client_args(
            model_input, output_type, **inference_kwargs,
        )

        with normalize_provider_errors(PROVIDER):
            stream = self.client.chat.completions.create(
                **client_args, stream=True,
            )
            for chunk in stream:  # pragma: no cover
                if chunk.choices and chunk.choices[0].delta.content is not None:
                    yield chunk.choices[0].delta.content

    def _build_client_args(
        self,
        model_input: Union[Chat, str, list],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> dict:
        """Build the arguments to pass to the OpenAI client."""
        messages = self.type_adapter.format_input(model_input)
        output_type_args = self.type_adapter.format_output_type(output_type)
        extra_body = inference_kwargs.pop("extra_body", {})
        extra_body.update(output_type_args)

        if "model" not in inference_kwargs and self.model_name is not None:
            inference_kwargs["model"] = self.model_name

        client_args = {
            "messages": messages,
            **inference_kwargs,
        }
        if extra_body:
            client_args["extra_body"] = extra_body

        return client_args

`init(client, model_name=None)`

Parameters:

Name	Type	Description	Default
`client`	`OpenAI`	An `openai.OpenAI` client instance.	required

Source code in outlines/models/vllm.py

def __init__(
    self,
    client: "OpenAI",
    model_name: Optional[str] = None,
):
    """
    Parameters
    ----------
    client
        An `openai.OpenAI` client instance.

    """
    self.client = client
    self.model_name = model_name
    self.type_adapter = VLLMTypeAdapter()

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Union[str, list[str]]`	The text generated by the model.

Source code in outlines/models/vllm.py

def generate(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, list[str]]:
    """Generate text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Union[str, list[str]]
        The text generated by the model.

    """
    client_args = self._build_client_args(
        model_input,
        output_type,
        **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        response = self.client.chat.completions.create(**client_args)

    messages = [choice.message for choice in response.choices]
    for message in messages:
        if message.refusal is not None:  # pragma: no cover
            raise GenerationError(
                f"The vLLM server refused to answer the request: "
                f"{message.refusal}",
                provider=PROVIDER,
            )

    if len(messages) == 1:
        return messages[0].content
    else:
        return [message.content for message in messages]

`generate_stream(model_input, output_type=None, **inference_kwargs)`

Stream text using vLLM.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the client.	`{}`

Returns:

Type	Description
`Iterator[str]`	An iterator that yields the text generated by the model.

Source code in outlines/models/vllm.py

def generate_stream(
    self,
    model_input: Union[Chat, str, list],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Iterator[str]:
    """Stream text using vLLM.

    Parameters
    ----------
    model_input
        The prompt based on which the model will generate a response.
    output_type
        The desired format of the response generated by the model. All
        output types available in Outlines are supported provided your
        server uses a structured generation backend that supports them.
    inference_kwargs
        Additional keyword arguments to pass to the client.

    Returns
    -------
    Iterator[str]
        An iterator that yields the text generated by the model.

    """
    client_args = self._build_client_args(
        model_input, output_type, **inference_kwargs,
    )

    with normalize_provider_errors(PROVIDER):
        stream = self.client.chat.completions.create(
            **client_args, stream=True,
        )
        for chunk in stream:  # pragma: no cover
            if chunk.choices and chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

`VLLMTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the VLLM and AsyncVLLM models.

Source code in outlines/models/vllm.py

class VLLMTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `VLLM` and `AsyncVLLM` models."""

    def format_input(self, model_input: Union[Chat, str, list]) -> list:
        """Generate the value of the messages argument to pass to the client.

        We rely on the OpenAITypeAdapter to format the input as the vLLM server
        expects input in the same format as OpenAI.

        Parameters
        ----------
        model_input
            The input passed by the user.

        Returns
        -------
        list
            The formatted input to be passed to the model.

        """
        return OpenAITypeAdapter().format_input(model_input)

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the client.

        Parameters
        ----------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The structured output argument to pass to the model.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            return {"guided_grammar": term.definition}
        elif isinstance(term, JsonSchema):
            extra_body = {"guided_json": json.loads(term.schema)}
            if term.whitespace_pattern:
                extra_body["whitespace_pattern"] = term.whitespace_pattern
            return extra_body
        else:
            return {"guided_regex": to_regex(term)}

`format_input(model_input)`

Generate the value of the messages argument to pass to the client.

We rely on the OpenAITypeAdapter to format the input as the vLLM server expects input in the same format as OpenAI.

Parameters:

Name	Type	Description	Default
`model_input`	`Union[Chat, str, list]`	The input passed by the user.	required

Returns:

Type	Description
`list`	The formatted input to be passed to the model.

Source code in outlines/models/vllm.py

def format_input(self, model_input: Union[Chat, str, list]) -> list:
    """Generate the value of the messages argument to pass to the client.

    We rely on the OpenAITypeAdapter to format the input as the vLLM server
    expects input in the same format as OpenAI.

    Parameters
    ----------
    model_input
        The input passed by the user.

    Returns
    -------
    list
        The formatted input to be passed to the model.

    """
    return OpenAITypeAdapter().format_input(model_input)

`format_output_type(output_type=None)`

Generate the structured output argument to pass to the client.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The structured output type provided.	`None`

Returns:

Type	Description
`dict`	The structured output argument to pass to the model.

Source code in outlines/models/vllm.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the client.

    Parameters
    ----------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The structured output argument to pass to the model.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        return {"guided_grammar": term.definition}
    elif isinstance(term, JsonSchema):
        extra_body = {"guided_json": json.loads(term.schema)}
        if term.whitespace_pattern:
            extra_body["whitespace_pattern"] = term.whitespace_pattern
        return extra_body
    else:
        return {"guided_regex": to_regex(term)}

`from_vllm(client, model_name=None)`

Create an Outlines VLLM or AsyncVLLM model instance from an openai.OpenAI or openai.AsyncOpenAI instance.

Parameters:

Name	Type	Description	Default
`client`	`Union[OpenAI, AsyncOpenAI]`	An `openai.OpenAI` or `openai.AsyncOpenAI` instance.	required
`model_name`	`Optional[str]`	The name of the model to use.	`None`

Returns:

Type	Description
`Union[VLLM, AsyncVLLM]`	An Outlines `VLLM` or `AsyncVLLM` model instance.

Source code in outlines/models/vllm.py

def from_vllm(
    client: Union["OpenAI", "AsyncOpenAI"],
    model_name: Optional[str] = None,
) -> Union[VLLM, AsyncVLLM]:
    """Create an Outlines `VLLM` or `AsyncVLLM` model instance from an
    `openai.OpenAI` or `openai.AsyncOpenAI` instance.

    Parameters
    ----------
    client
        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.
    model_name
        The name of the model to use.

    Returns
    -------
    Union[VLLM, AsyncVLLM]
        An Outlines `VLLM` or `AsyncVLLM` model instance.

    """
    from openai import AsyncOpenAI, OpenAI

    if isinstance(client, OpenAI):
        return VLLM(client, model_name)
    elif isinstance(client, AsyncOpenAI):
        return AsyncVLLM(client, model_name)
    else:
        raise ValueError(
            f"Unsupported client type: {type(client)}.\n"
            "Please provide an OpenAI or AsyncOpenAI instance."
        )

`vllm_offline`

Integration with the vllm library (offline mode).

Local runtime calls intentionally bypass outlines.exceptions.normalize_provider_errors().

`VLLMOffline`

Bases: Model

Thin wrapper around a vllm.LLM model.

This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the vllm.LLM model.

Source code in outlines/models/vllm_offline.py

class VLLMOffline(Model):
    """Thin wrapper around a `vllm.LLM` model.

    This wrapper is used to convert the input and output types specified by the
    users at a higher level to arguments to the `vllm.LLM` model.

    """

    def __init__(self, model: "LLM"):
        """Create a VLLM model instance.

        Parameters
        ----------
        model
            A `vllm.LLM` model instance.

        """
        self.model = model
        self.tokenizer = self.model.get_tokenizer()
        self.type_adapter = VLLMOfflineTypeAdapter(has_chat_template=self._check_chat_template())

    def _build_generation_args(
        self,
        inference_kwargs: dict,
        output_type: Optional[Any] = None,
    ) -> "SamplingParams":
        """Create the `SamplingParams` object to pass to the `generate` method
        of the `vllm.LLM` model."""
        from vllm.sampling_params import StructuredOutputsParams, SamplingParams

        sampling_params = inference_kwargs.pop("sampling_params", None)

        if sampling_params is None:
            sampling_params = SamplingParams()

        output_type_args = self.type_adapter.format_output_type(output_type)
        if output_type_args:
            original_sampling_params_dict = {f: getattr(sampling_params, f) for f in sampling_params.__struct_fields__}
            sampling_params_dict = {**original_sampling_params_dict, "structured_outputs": StructuredOutputsParams(**output_type_args)}
            sampling_params = SamplingParams(**sampling_params_dict)

        return sampling_params

    def generate(
        self,
        model_input: Chat | str,
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[str, List[str]]:
        """Generate text using vLLM offline.

        Parameters
        ----------
        prompt
            The prompt based on which the model will generate a response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            in the `vllm.LLM` model.

        Returns
        -------
        Union[str, List[str]]
            The text generated by the model.

        """
        sampling_params = self._build_generation_args(
            inference_kwargs,
            output_type,
        )

        model_input = self.type_adapter.format_input(model_input)

        if isinstance(model_input, list):
            results = self.model.chat(
                messages=model_input,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        else:
            results = self.model.generate(
                prompts=model_input,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        results = [completion.text for completion in results[0].outputs]

        if len(results) == 1:
            return results[0]
        else:
            return results

    def generate_batch(
        self,
        model_input: List[Chat | str],
        output_type: Optional[Any] = None,
        **inference_kwargs: Any,
    ) -> Union[List[str], List[List[str]]]:
        """Generate a batch of completions using vLLM offline.

        Parameters
        ----------
        prompt
            The list of prompts based on which the model will generate a
            response.
        output_type
            The logits processor the model will use to constrain the format of
            the generated text.
        inference_kwargs
            Additional keyword arguments to pass to the `generate` method
            in the `vllm.LLM` model.

        Returns
        -------
        Union[List[str], List[List[str]]]
            The text generated by the model.

        """
        sampling_params = self._build_generation_args(
            inference_kwargs,
            output_type,
        )

        model_inputs = [self.type_adapter.format_input(item) for item in model_input]

        if model_inputs and isinstance(model_inputs[0], list):
            results = self.model.chat(
                messages=model_inputs,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        else:
            results = self.model.generate(
                prompts=model_inputs,
                sampling_params=sampling_params,
                **inference_kwargs,
            )
        return [[sample.text for sample in batch.outputs] for batch in results]

    def generate_stream(self, model_input, output_type, **inference_kwargs):
        """Not available for `vllm.LLM`.

        TODO: Implement the streaming functionality ourselves.

        """
        raise NotImplementedError(
            "Streaming is not available for the vLLM offline integration."
        )

    def _check_chat_template(self) -> bool:
        """Check if the tokenizer has a chat template."""
        # 1. Try HuggingFace-style chat template check (get_chat_template).
        # Only return early on True; on False or any exception fall through to
        # step 2 so that vLLM-style tokenizers are still handled correctly.
        if hasattr(self.tokenizer, "chat_template") or hasattr(self.tokenizer, "apply_chat_template"):
            try:
                from outlines.models.tokenizer import _check_hf_chat_template
                if _check_hf_chat_template(self.tokenizer):
                    return True
            except Exception:
                pass

        # 2. Try vLLM-style apply_chat_template (works for old and new vLLM).
        if hasattr(self.tokenizer, "apply_chat_template"):
            try:
                self.tokenizer.apply_chat_template([{"role": "user", "content": "test"}])
                return True
            except Exception:
                pass

        # 3. Default: no chat template
        return False

`init(model)`

Create a VLLM model instance.

Parameters:

Name	Type	Description	Default
`model`	`LLM`	A `vllm.LLM` model instance.	required

Source code in outlines/models/vllm_offline.py

def __init__(self, model: "LLM"):
    """Create a VLLM model instance.

    Parameters
    ----------
    model
        A `vllm.LLM` model instance.

    """
    self.model = model
    self.tokenizer = self.model.get_tokenizer()
    self.type_adapter = VLLMOfflineTypeAdapter(has_chat_template=self._check_chat_template())

`generate(model_input, output_type=None, **inference_kwargs)`

Generate text using vLLM offline.

Parameters:

Name	Type	Description	Default
`prompt`		The prompt based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method in the `vllm.LLM` model.	`{}`

Returns:

Type	Description
`Union[str, List[str]]`	The text generated by the model.

Source code in outlines/models/vllm_offline.py

def generate(
    self,
    model_input: Chat | str,
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[str, List[str]]:
    """Generate text using vLLM offline.

    Parameters
    ----------
    prompt
        The prompt based on which the model will generate a response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        in the `vllm.LLM` model.

    Returns
    -------
    Union[str, List[str]]
        The text generated by the model.

    """
    sampling_params = self._build_generation_args(
        inference_kwargs,
        output_type,
    )

    model_input = self.type_adapter.format_input(model_input)

    if isinstance(model_input, list):
        results = self.model.chat(
            messages=model_input,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    else:
        results = self.model.generate(
            prompts=model_input,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    results = [completion.text for completion in results[0].outputs]

    if len(results) == 1:
        return results[0]
    else:
        return results

`generate_batch(model_input, output_type=None, **inference_kwargs)`

Generate a batch of completions using vLLM offline.

Parameters:

Name	Type	Description	Default
`prompt`		The list of prompts based on which the model will generate a response.	required
`output_type`	`Optional[Any]`	The logits processor the model will use to constrain the format of the generated text.	`None`
`inference_kwargs`	`Any`	Additional keyword arguments to pass to the `generate` method in the `vllm.LLM` model.	`{}`

Returns:

Type	Description
`Union[List[str], List[List[str]]]`	The text generated by the model.

Source code in outlines/models/vllm_offline.py

def generate_batch(
    self,
    model_input: List[Chat | str],
    output_type: Optional[Any] = None,
    **inference_kwargs: Any,
) -> Union[List[str], List[List[str]]]:
    """Generate a batch of completions using vLLM offline.

    Parameters
    ----------
    prompt
        The list of prompts based on which the model will generate a
        response.
    output_type
        The logits processor the model will use to constrain the format of
        the generated text.
    inference_kwargs
        Additional keyword arguments to pass to the `generate` method
        in the `vllm.LLM` model.

    Returns
    -------
    Union[List[str], List[List[str]]]
        The text generated by the model.

    """
    sampling_params = self._build_generation_args(
        inference_kwargs,
        output_type,
    )

    model_inputs = [self.type_adapter.format_input(item) for item in model_input]

    if model_inputs and isinstance(model_inputs[0], list):
        results = self.model.chat(
            messages=model_inputs,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    else:
        results = self.model.generate(
            prompts=model_inputs,
            sampling_params=sampling_params,
            **inference_kwargs,
        )
    return [[sample.text for sample in batch.outputs] for batch in results]

`generate_stream(model_input, output_type, **inference_kwargs)`

Not available for vllm.LLM.

TODO: Implement the streaming functionality ourselves.

Source code in outlines/models/vllm_offline.py

def generate_stream(self, model_input, output_type, **inference_kwargs):
    """Not available for `vllm.LLM`.

    TODO: Implement the streaming functionality ourselves.

    """
    raise NotImplementedError(
        "Streaming is not available for the vLLM offline integration."
    )

`VLLMOfflineTypeAdapter`

Bases: ModelTypeAdapter

Type adapter for the VLLMOffline model.

Source code in outlines/models/vllm_offline.py

class VLLMOfflineTypeAdapter(ModelTypeAdapter):
    """Type adapter for the `VLLMOffline` model."""

    def __init__(self, has_chat_template: bool = False):
        self.has_chat_template = has_chat_template

    @singledispatchmethod
    def format_input(self, model_input):
        """Generate the prompt argument to pass to the model.

        Argument
        --------
        model_input
            The input passed by the user.

        """
        raise TypeError(
            f"The input type {type(model_input)} is not available with "
            "VLLM offline. The only available types are `str` and "
            "`Chat` (containing a prompt and images)."
        )

    @format_input.register(str)
    def format_input_str(self, model_input: str) -> str | list:
        """Format a `str` input.

        """
        if self.has_chat_template:
            return self.format_input_chat(Chat([{"role": "user", "content": model_input}]))
        return model_input

    @format_input.register(Chat)
    def format_input_chat(self, model_input: Chat) -> list:
        """Format a `Chat` input.

        """
        for message in model_input.messages:
            content = message["content"]
            if isinstance(content, list):
                raise ValueError(
                    "Assets are not supported for vLLM offline."
                    "Please only use text content in the `Chat` input."
                )
        return OpenAITypeAdapter().format_input(model_input)

    def format_output_type(self, output_type: Optional[Any] = None) -> dict:
        """Generate the structured output argument to pass to the model.

        For vLLM, the structured output definition is set in the
        `GuidedDecodingParams` constructor that is provided as a value to the
        `guided_decoding` parameter of the `SamplingParams` constructor, itself
        provided as a value to the `sampling_params` parameter of the `generate`
        method.

        Parameters
        ----------
        output_type
            The structured output type provided.

        Returns
        -------
        dict
            The arguments to provide to the `GuidedDecodingParams` constructor.

        """
        if output_type is None:
            return {}

        term = python_types_to_terms(output_type)
        if isinstance(term, CFG):
            return {"grammar": term.definition}
        elif isinstance(term, JsonSchema):
            guided_decoding_params = {"json": json.loads(term.schema)}
            if term.whitespace_pattern:
                guided_decoding_params["whitespace_pattern"] = term.whitespace_pattern
            return guided_decoding_params
        else:
            return {"regex": to_regex(term)}

`format_input(model_input)`

Generate the prompt argument to pass to the model.

Argument

model_input The input passed by the user.

Source code in outlines/models/vllm_offline.py

@singledispatchmethod
def format_input(self, model_input):
    """Generate the prompt argument to pass to the model.

    Argument
    --------
    model_input
        The input passed by the user.

    """
    raise TypeError(
        f"The input type {type(model_input)} is not available with "
        "VLLM offline. The only available types are `str` and "
        "`Chat` (containing a prompt and images)."
    )

`format_input_chat(model_input)`

Format a Chat input.

Source code in outlines/models/vllm_offline.py

@format_input.register(Chat)
def format_input_chat(self, model_input: Chat) -> list:
    """Format a `Chat` input.

    """
    for message in model_input.messages:
        content = message["content"]
        if isinstance(content, list):
            raise ValueError(
                "Assets are not supported for vLLM offline."
                "Please only use text content in the `Chat` input."
            )
    return OpenAITypeAdapter().format_input(model_input)

`format_input_str(model_input)`

Format a str input.

Source code in outlines/models/vllm_offline.py

@format_input.register(str)
def format_input_str(self, model_input: str) -> str | list:
    """Format a `str` input.

    """
    if self.has_chat_template:
        return self.format_input_chat(Chat([{"role": "user", "content": model_input}]))
    return model_input

`format_output_type(output_type=None)`

Generate the structured output argument to pass to the model.

For vLLM, the structured output definition is set in the GuidedDecodingParams constructor that is provided as a value to the guided_decoding parameter of the SamplingParams constructor, itself provided as a value to the sampling_params parameter of the generate method.

Parameters:

Name	Type	Description	Default
`output_type`	`Optional[Any]`	The structured output type provided.	`None`

Returns:

Type	Description
`dict`	The arguments to provide to the `GuidedDecodingParams` constructor.

Source code in outlines/models/vllm_offline.py

def format_output_type(self, output_type: Optional[Any] = None) -> dict:
    """Generate the structured output argument to pass to the model.

    For vLLM, the structured output definition is set in the
    `GuidedDecodingParams` constructor that is provided as a value to the
    `guided_decoding` parameter of the `SamplingParams` constructor, itself
    provided as a value to the `sampling_params` parameter of the `generate`
    method.

    Parameters
    ----------
    output_type
        The structured output type provided.

    Returns
    -------
    dict
        The arguments to provide to the `GuidedDecodingParams` constructor.

    """
    if output_type is None:
        return {}

    term = python_types_to_terms(output_type)
    if isinstance(term, CFG):
        return {"grammar": term.definition}
    elif isinstance(term, JsonSchema):
        guided_decoding_params = {"json": json.loads(term.schema)}
        if term.whitespace_pattern:
            guided_decoding_params["whitespace_pattern"] = term.whitespace_pattern
        return guided_decoding_params
    else:
        return {"regex": to_regex(term)}

`from_vllm_offline(model)`

Create an Outlines VLLMOffline model instance from a vllm.LLM instance.

Parameters:

Name	Type	Description	Default
`model`	`LLM`	A `vllm.LLM` instance.	required

Returns:

Type	Description
`VLLMOffline`	An Outlines `VLLMOffline` model instance.

Source code in outlines/models/vllm_offline.py

def from_vllm_offline(model: "LLM") -> VLLMOffline:
    """Create an Outlines `VLLMOffline` model instance from a `vllm.LLM`
    instance.

    Parameters
    ----------
    model
        A `vllm.LLM` instance.

    Returns
    -------
    VLLMOffline
        An Outlines `VLLMOffline` model instance.

    """
    return VLLMOffline(model)

`processors`

Processors to control generation in steerable models.

`OutlinesLogitsProcessor`

Base class for logits processors. This class implements a shared __call__ method is called by the models and returns the processed logits. It relies on the process_logits method that must be implemented by the subclasses to do the actual processing. The tensor_adapter attribute, created at initialization based on the tensor library name specified in the constructor, is used to manipulate the tensors using the appropriate library for the model (numpy, torch...).

Source code in outlines/processors/base_logits_processor.py

class OutlinesLogitsProcessor:
    """Base class for logits processors.
    This class implements a shared `__call__` method is called by the models
    and returns the processed logits. It relies on the `process_logits` method
    that must be implemented by the subclasses to do the actual processing. The
    `tensor_adapter` attribute, created at initialization based on the
    tensor library name specified in the constructor, is used to manipulate the
    tensors using the appropriate library for the model (numpy, torch...).
    """
    tensor_adapter: TensorAdapterImplementation

    def __init__(self, tensor_library_name: str):
        """
        Parameters
        ----------
        tensor_library_name
            The name of the library to use to manipulate tensors. Possible
            values are "mlx", "numpy" and "torch". You must choose the library
            that your model is using.
        """
        # Temporary fix as torch raises a warning that can cause can an error
        # with python 3.12.
        if tensor_library_name == "torch":
            import torch._dynamo

            torch._dynamo.config.suppress_errors = True

        tensor_adapter_class = tensor_adapters.get(tensor_library_name)
        if tensor_adapter_class is None:
            raise NotImplementedError(
                f"Library {tensor_library_name} is not available"
            )
        self.tensor_adapter = tensor_adapter_class()  # type: ignore

    def reset(self):
        """Reset the logits processor for a new generation

        Only implement this method in subclasses if the logits processor
        needs to be reset for a new generation.

        """
        pass # pragma: no cover

    @abstractmethod
    def process_logits(
        self, input_ids: TensorType, logits: TensorType
    ) -> TensorType:
        """Main method to implement for logits processors subclasses.
        This method applies a mask on the logits to bias the generation.
        It is called by the `__call__` method that standardizes the shape of
        `input_ids` and `logits` to ensure they are 2D tensors.
        Elements to keep in mind when designing universal logits processors:
        - logits processors are only used once and never re-applied for a new
        sequence generator
        - Some models only pass output_ids, some models such as llamacpp and
        transformers prefix with input_ids
        - Some sampling methods, such as beam search, result in unstable
        sequence ordering in models like vLLM
        Parameters
        ----------
        input_ids
            The ids of the tokens of the existing sequences in a 2D tensor.
        logits
            The logits for the current generation step in a 2D tensor.
        Returns
        -------
        TensorType
            The processed logits as a 2D tensor.
        """
        ...

    def __call__(
        self, input_ids: TensorType, logits: TensorType
    ) -> TensorType:
        """Entrypoint for logits processors, this is the method that is
        called by the model.
        Because different models use different structures to store the
        input_ids and logits, we standardize their format to 2D tensors
        before calling the `process_logits` method. After processing, the
        logits are cast back to the original array library type before being
        returned.
        Parameters
        ----------
        input_ids
            The ids of the tokens of the existing sequences in a tensor.
        logits
            The logits for the current generation step in a tensor.
        Returns
        -------
        TensorType
            The processed logits as a tensor.
        """
        # if input_ids is 1D and logits is 2D with a single sequence,
        # reshape input_ids to 2D (needed for mlx-lm)
        if (
            len(self.tensor_adapter.shape(input_ids)) == 1
            and len(self.tensor_adapter.shape(logits)) == 2
            and self.tensor_adapter.shape(logits)[0] == 1
        ):
            input_ids = self.tensor_adapter.unsqueeze(input_ids)

        assert (
            self.tensor_adapter.shape(logits)[:-1]
            == self.tensor_adapter.shape(input_ids)[:-1]
        )

        # Guarantee passed as 2D Tensors, then covert back to original
        # (1D or 2D) shape
        if len(self.tensor_adapter.shape(logits)) == 2:
            processed_logits = self.process_logits(input_ids, logits)
        elif len(self.tensor_adapter.shape(logits)) == 1:
            processed_logits = self.tensor_adapter.squeeze(
                self.process_logits(
                    self.tensor_adapter.unsqueeze(input_ids),
                    self.tensor_adapter.unsqueeze(logits),
                ),
            )
        else:
            raise ValueError(
                f"Logits shape {self.tensor_adapter.shape(logits)} is not "
                + "supported"
            )

        return processed_logits

`call(input_ids, logits)`

Entrypoint for logits processors, this is the method that is called by the model. Because different models use different structures to store the input_ids and logits, we standardize their format to 2D tensors before calling the process_logits method. After processing, the logits are cast back to the original array library type before being returned.

Parameters:

Name	Type	Description	Default
`input_ids`	`TensorType`	The ids of the tokens of the existing sequences in a tensor.	required
`logits`	`TensorType`	The logits for the current generation step in a tensor.	required

Returns:

Type	Description
`TensorType`	The processed logits as a tensor.

Source code in outlines/processors/base_logits_processor.py

def __call__(
    self, input_ids: TensorType, logits: TensorType
) -> TensorType:
    """Entrypoint for logits processors, this is the method that is
    called by the model.
    Because different models use different structures to store the
    input_ids and logits, we standardize their format to 2D tensors
    before calling the `process_logits` method. After processing, the
    logits are cast back to the original array library type before being
    returned.
    Parameters
    ----------
    input_ids
        The ids of the tokens of the existing sequences in a tensor.
    logits
        The logits for the current generation step in a tensor.
    Returns
    -------
    TensorType
        The processed logits as a tensor.
    """
    # if input_ids is 1D and logits is 2D with a single sequence,
    # reshape input_ids to 2D (needed for mlx-lm)
    if (
        len(self.tensor_adapter.shape(input_ids)) == 1
        and len(self.tensor_adapter.shape(logits)) == 2
        and self.tensor_adapter.shape(logits)[0] == 1
    ):
        input_ids = self.tensor_adapter.unsqueeze(input_ids)

    assert (
        self.tensor_adapter.shape(logits)[:-1]
        == self.tensor_adapter.shape(input_ids)[:-1]
    )

    # Guarantee passed as 2D Tensors, then covert back to original
    # (1D or 2D) shape
    if len(self.tensor_adapter.shape(logits)) == 2:
        processed_logits = self.process_logits(input_ids, logits)
    elif len(self.tensor_adapter.shape(logits)) == 1:
        processed_logits = self.tensor_adapter.squeeze(
            self.process_logits(
                self.tensor_adapter.unsqueeze(input_ids),
                self.tensor_adapter.unsqueeze(logits),
            ),
        )
    else:
        raise ValueError(
            f"Logits shape {self.tensor_adapter.shape(logits)} is not "
            + "supported"
        )

    return processed_logits

`init(tensor_library_name)`

Parameters:

Name	Type	Description	Default
`tensor_library_name`	`str`	The name of the library to use to manipulate tensors. Possible values are "mlx", "numpy" and "torch". You must choose the library that your model is using.	required

Source code in outlines/processors/base_logits_processor.py

def __init__(self, tensor_library_name: str):
    """
    Parameters
    ----------
    tensor_library_name
        The name of the library to use to manipulate tensors. Possible
        values are "mlx", "numpy" and "torch". You must choose the library
        that your model is using.
    """
    # Temporary fix as torch raises a warning that can cause can an error
    # with python 3.12.
    if tensor_library_name == "torch":
        import torch._dynamo

        torch._dynamo.config.suppress_errors = True

    tensor_adapter_class = tensor_adapters.get(tensor_library_name)
    if tensor_adapter_class is None:
        raise NotImplementedError(
            f"Library {tensor_library_name} is not available"
        )
    self.tensor_adapter = tensor_adapter_class()  # type: ignore

`process_logits(input_ids, logits)` `abstractmethod`

Main method to implement for logits processors subclasses. This method applies a mask on the logits to bias the generation. It is called by the __call__ method that standardizes the shape of input_ids and logits to ensure they are 2D tensors. Elements to keep in mind when designing universal logits processors: - logits processors are only used once and never re-applied for a new sequence generator - Some models only pass output_ids, some models such as llamacpp and transformers prefix with input_ids - Some sampling methods, such as beam search, result in unstable sequence ordering in models like vLLM

Parameters:

Name	Type	Description	Default
`input_ids`	`TensorType`	The ids of the tokens of the existing sequences in a 2D tensor.	required
`logits`	`TensorType`	The logits for the current generation step in a 2D tensor.	required

Returns:

Type	Description
`TensorType`	The processed logits as a 2D tensor.

Source code in outlines/processors/base_logits_processor.py

@abstractmethod
def process_logits(
    self, input_ids: TensorType, logits: TensorType
) -> TensorType:
    """Main method to implement for logits processors subclasses.
    This method applies a mask on the logits to bias the generation.
    It is called by the `__call__` method that standardizes the shape of
    `input_ids` and `logits` to ensure they are 2D tensors.
    Elements to keep in mind when designing universal logits processors:
    - logits processors are only used once and never re-applied for a new
    sequence generator
    - Some models only pass output_ids, some models such as llamacpp and
    transformers prefix with input_ids
    - Some sampling methods, such as beam search, result in unstable
    sequence ordering in models like vLLM
    Parameters
    ----------
    input_ids
        The ids of the tokens of the existing sequences in a 2D tensor.
    logits
        The logits for the current generation step in a 2D tensor.
    Returns
    -------
    TensorType
        The processed logits as a 2D tensor.
    """
    ...

`reset()`

Reset the logits processor for a new generation

Only implement this method in subclasses if the logits processor needs to be reset for a new generation.

Source code in outlines/processors/base_logits_processor.py

def reset(self):
    """Reset the logits processor for a new generation

    Only implement this method in subclasses if the logits processor
    needs to be reset for a new generation.

    """
    pass # pragma: no cover

`base_logits_processor`

Base class for logits processors.

`OutlinesLogitsProcessor`

Base class for logits processors. This class implements a shared __call__ method is called by the models and returns the processed logits. It relies on the process_logits method that must be implemented by the subclasses to do the actual processing. The tensor_adapter attribute, created at initialization based on the tensor library name specified in the constructor, is used to manipulate the tensors using the appropriate library for the model (numpy, torch...).

Source code in outlines/processors/base_logits_processor.py

class OutlinesLogitsProcessor:
    """Base class for logits processors.
    This class implements a shared `__call__` method is called by the models
    and returns the processed logits. It relies on the `process_logits` method
    that must be implemented by the subclasses to do the actual processing. The
    `tensor_adapter` attribute, created at initialization based on the
    tensor library name specified in the constructor, is used to manipulate the
    tensors using the appropriate library for the model (numpy, torch...).
    """
    tensor_adapter: TensorAdapterImplementation

    def __init__(self, tensor_library_name: str):
        """
        Parameters
        ----------
        tensor_library_name
            The name of the library to use to manipulate tensors. Possible
            values are "mlx", "numpy" and "torch". You must choose the library
            that your model is using.
        """
        # Temporary fix as torch raises a warning that can cause can an error
        # with python 3.12.
        if tensor_library_name == "torch":
            import torch._dynamo

            torch._dynamo.config.suppress_errors = True

        tensor_adapter_class = tensor_adapters.get(tensor_library_name)
        if tensor_adapter_class is None:
            raise NotImplementedError(
                f"Library {tensor_library_name} is not available"
            )
        self.tensor_adapter = tensor_adapter_class()  # type: ignore

    def reset(self):
        """Reset the logits processor for a new generation

        Only implement this method in subclasses if the logits processor
        needs to be reset for a new generation.

        """
        pass # pragma: no cover

    @abstractmethod
    def process_logits(
        self, input_ids: TensorType, logits: TensorType
    ) -> TensorType:
        """Main method to implement for logits processors subclasses.
        This method applies a mask on the logits to bias the generation.
        It is called by the `__call__` method that standardizes the shape of
        `input_ids` and `logits` to ensure they are 2D tensors.
        Elements to keep in mind when designing universal logits processors:
        - logits processors are only used once and never re-applied for a new
        sequence generator
        - Some models only pass output_ids, some models such as llamacpp and
        transformers prefix with input_ids
        - Some sampling methods, such as beam search, result in unstable
        sequence ordering in models like vLLM
        Parameters
        ----------
        input_ids
            The ids of the tokens of the existing sequences in a 2D tensor.
        logits
            The logits for the current generation step in a 2D tensor.
        Returns
        -------
        TensorType
            The processed logits as a 2D tensor.
        """
        ...

    def __call__(
        self, input_ids: TensorType, logits: TensorType
    ) -> TensorType:
        """Entrypoint for logits processors, this is the method that is
        called by the model.
        Because different models use different structures to store the
        input_ids and logits, we standardize their format to 2D tensors
        before calling the `process_logits` method. After processing, the
        logits are cast back to the original array library type before being
        returned.
        Parameters
        ----------
        input_ids
            The ids of the tokens of the existing sequences in a tensor.
        logits
            The logits for the current generation step in a tensor.
        Returns
        -------
        TensorType
            The processed logits as a tensor.
        """
        # if input_ids is 1D and logits is 2D with a single sequence,
        # reshape input_ids to 2D (needed for mlx-lm)
        if (
            len(self.tensor_adapter.shape(input_ids)) == 1
            and len(self.tensor_adapter.shape(logits)) == 2
            and self.tensor_adapter.shape(logits)[0] == 1
        ):
            input_ids = self.tensor_adapter.unsqueeze(input_ids)

        assert (
            self.tensor_adapter.shape(logits)[:-1]
            == self.tensor_adapter.shape(input_ids)[:-1]
        )

        # Guarantee passed as 2D Tensors, then covert back to original
        # (1D or 2D) shape
        if len(self.tensor_adapter.shape(logits)) == 2:
            processed_logits = self.process_logits(input_ids, logits)
        elif len(self.tensor_adapter.shape(logits)) == 1:
            processed_logits = self.tensor_adapter.squeeze(
                self.process_logits(
                    self.tensor_adapter.unsqueeze(input_ids),
                    self.tensor_adapter.unsqueeze(logits),
                ),
            )
        else:
            raise ValueError(
                f"Logits shape {self.tensor_adapter.shape(logits)} is not "
                + "supported"
            )

        return processed_logits

`call(input_ids, logits)`

Entrypoint for logits processors, this is the method that is called by the model. Because different models use different structures to store the input_ids and logits, we standardize their format to 2D tensors before calling the process_logits method. After processing, the logits are cast back to the original array library type before being returned.

Parameters:

Name	Type	Description	Default
`input_ids`	`TensorType`	The ids of the tokens of the existing sequences in a tensor.	required
`logits`	`TensorType`	The logits for the current generation step in a tensor.	required

Returns:

Type	Description
`TensorType`	The processed logits as a tensor.

Source code in outlines/processors/base_logits_processor.py

def __call__(
    self, input_ids: TensorType, logits: TensorType
) -> TensorType:
    """Entrypoint for logits processors, this is the method that is
    called by the model.
    Because different models use different structures to store the
    input_ids and logits, we standardize their format to 2D tensors
    before calling the `process_logits` method. After processing, the
    logits are cast back to the original array library type before being
    returned.
    Parameters
    ----------
    input_ids
        The ids of the tokens of the existing sequences in a tensor.
    logits
        The logits for the current generation step in a tensor.
    Returns
    -------
    TensorType
        The processed logits as a tensor.
    """
    # if input_ids is 1D and logits is 2D with a single sequence,
    # reshape input_ids to 2D (needed for mlx-lm)
    if (
        len(self.tensor_adapter.shape(input_ids)) == 1
        and len(self.tensor_adapter.shape(logits)) == 2
        and self.tensor_adapter.shape(logits)[0] == 1
    ):
        input_ids = self.tensor_adapter.unsqueeze(input_ids)

    assert (
        self.tensor_adapter.shape(logits)[:-1]
        == self.tensor_adapter.shape(input_ids)[:-1]
    )

    # Guarantee passed as 2D Tensors, then covert back to original
    # (1D or 2D) shape
    if len(self.tensor_adapter.shape(logits)) == 2:
        processed_logits = self.process_logits(input_ids, logits)
    elif len(self.tensor_adapter.shape(logits)) == 1:
        processed_logits = self.tensor_adapter.squeeze(
            self.process_logits(
                self.tensor_adapter.unsqueeze(input_ids),
                self.tensor_adapter.unsqueeze(logits),
            ),
        )
    else:
        raise ValueError(
            f"Logits shape {self.tensor_adapter.shape(logits)} is not "
            + "supported"
        )

    return processed_logits

`init(tensor_library_name)`

Parameters:

Name	Type	Description	Default
`tensor_library_name`	`str`	The name of the library to use to manipulate tensors. Possible values are "mlx", "numpy" and "torch". You must choose the library that your model is using.	required

Source code in outlines/processors/base_logits_processor.py

def __init__(self, tensor_library_name: str):
    """
    Parameters
    ----------
    tensor_library_name
        The name of the library to use to manipulate tensors. Possible
        values are "mlx", "numpy" and "torch". You must choose the library
        that your model is using.
    """
    # Temporary fix as torch raises a warning that can cause can an error
    # with python 3.12.
    if tensor_library_name == "torch":
        import torch._dynamo

        torch._dynamo.config.suppress_errors = True

    tensor_adapter_class = tensor_adapters.get(tensor_library_name)
    if tensor_adapter_class is None:
        raise NotImplementedError(
            f"Library {tensor_library_name} is not available"
        )
    self.tensor_adapter = tensor_adapter_class()  # type: ignore

`process_logits(input_ids, logits)` `abstractmethod`

Main method to implement for logits processors subclasses. This method applies a mask on the logits to bias the generation. It is called by the __call__ method that standardizes the shape of input_ids and logits to ensure they are 2D tensors. Elements to keep in mind when designing universal logits processors: - logits processors are only used once and never re-applied for a new sequence generator - Some models only pass output_ids, some models such as llamacpp and transformers prefix with input_ids - Some sampling methods, such as beam search, result in unstable sequence ordering in models like vLLM

Parameters:

Name	Type	Description	Default
`input_ids`	`TensorType`	The ids of the tokens of the existing sequences in a 2D tensor.	required
`logits`	`TensorType`	The logits for the current generation step in a 2D tensor.	required

Returns:

Type	Description
`TensorType`	The processed logits as a 2D tensor.

Source code in outlines/processors/base_logits_processor.py

@abstractmethod
def process_logits(
    self, input_ids: TensorType, logits: TensorType
) -> TensorType:
    """Main method to implement for logits processors subclasses.
    This method applies a mask on the logits to bias the generation.
    It is called by the `__call__` method that standardizes the shape of
    `input_ids` and `logits` to ensure they are 2D tensors.
    Elements to keep in mind when designing universal logits processors:
    - logits processors are only used once and never re-applied for a new
    sequence generator
    - Some models only pass output_ids, some models such as llamacpp and
    transformers prefix with input_ids
    - Some sampling methods, such as beam search, result in unstable
    sequence ordering in models like vLLM
    Parameters
    ----------
    input_ids
        The ids of the tokens of the existing sequences in a 2D tensor.
    logits
        The logits for the current generation step in a 2D tensor.
    Returns
    -------
    TensorType
        The processed logits as a 2D tensor.
    """
    ...

`reset()`

Reset the logits processor for a new generation

Only implement this method in subclasses if the logits processor needs to be reset for a new generation.

Source code in outlines/processors/base_logits_processor.py

def reset(self):
    """Reset the logits processor for a new generation

    Only implement this method in subclasses if the logits processor
    needs to be reset for a new generation.

    """
    pass # pragma: no cover

`tensor_adapters`

Library specific objects to manipulate tensors.

`base`

Base class for tensor adapters.

`TensorAdapter`

Bases: ABC

Abstract base class for tensor adapters.

This class defines the interface for tensor adapters that are used to manipulate tensors in different libraries. Concrete implementations of this class should provide specific implementations for each method as well as providing a library_name attribute.

TODO: Update the version of outlines-core used to receive plain arrays instead of torch tensors. In the meantime, implementations of this class must make sure that their full_like and concatenate methods can handle torch tensors.

Source code in outlines/processors/tensor_adapters/base.py

class TensorAdapter(ABC):
    """Abstract base class for tensor adapters.

    This class defines the interface for tensor adapters that are used to
    manipulate tensors in different libraries. Concrete implementations of
    this class should provide specific implementations for each method as
    well as providing a `library_name` attribute.

    TODO: Update the version of outlines-core used to receive plain arrays
    instead of torch tensors. In the meantime, implementations of this class
    must make sure that their `full_like` and `concatenate` methods can
    handle torch tensors.

    """
    library_name: str

    @abstractmethod
    def shape(self, tensor: TensorType) -> list[int]:
        """Get the shape of the tensor.

        Parameters
        ----------
        tensor
            The tensor to get the shape of.

        Returns
        -------
        list[int]
            The shape of the tensor. The list contains as many elements as
            there are dimensions in the tensor.

        """
        ...

    @abstractmethod
    def unsqueeze(self, tensor: TensorType) -> TensorType:
        """Add a dimension to the tensor at axis 0.

        Parameters
        ----------
        tensor
            The tensor to add a dimension to.

        Returns
        -------
        TensorType
            The tensor with an additional dimension.

        """
        ...

    @abstractmethod
    def squeeze(self, tensor: TensorType) -> TensorType:
        """Remove a dimension from the tensor at axis 0.

        Parameters
        ----------
        tensor
            The tensor to remove a dimension from.

        Returns
        -------
        TensorType
            The tensor with one less dimension.

        """
        ...

    @abstractmethod
    def to_list(self, tensor: TensorType) -> list:
        """Convert the tensor to a list.

        Parameters
        ----------
        tensor
            The tensor to convert to a list.

        Returns
        -------
        list
            The tensor as a list.

        """
        ...

    @abstractmethod
    def to_scalar(self, tensor: TensorType) -> Any:
        """Return the only element of the tensor.

        Parameters
        ----------
        tensor
            The tensor to return the only element of.

        Returns
        -------
        Any
            The only element of the tensor.

        """
        ...

    @abstractmethod
    def full_like(self, tensor: "torch.Tensor", fill_value: Any) -> TensorType: # type: ignore
        """Create a tensor with the same shape as the input tensor filled
        with a scalar value.

        ATTENTION: This method receives a torch tensor regardless of the
        library used.

        Parameters
        ----------
        tensor
            The tensor to create a new tensor with the same shape.
        fill_value
            The value to fill the new tensor with.

        Returns
        -------
        TensorType
            A tensor with the same shape as the input tensor filled with the
            specified value.

        """
        ...

    @abstractmethod
    def concatenate(
        self, tensors: list[Union["torch.Tensor", TensorType]]
    ) -> TensorType:
        """Concatenate a list of tensors along axis 0.

        ATTENTION: This method can either receive a list of torch tensors or
        a list of tensors from the library used.

        Parameters
        ----------
        tensors
            The list of tensors to concatenate.

        Returns
        -------
        TensorType
            The concatenated tensor.

        """
        ...

    @abstractmethod
    def get_device(self, tensor: TensorType) -> str:
        """Get the name of the tensor's device.

        Parameters
        ----------
        tensor
            The tensor to get the device of.

        Returns
        -------
        str
            The name of the tensor's device.

        """
        ...

    @abstractmethod
    def to_device(self, tensor: TensorType, device: str) -> TensorType:
        """Move the tensor to a specified device.

        Parameters
        ----------
        tensor
            The tensor to move to a specified device.
        device
            The name of the device to move the tensor to.

        Returns
        -------
        TensorType
            The tensor moved to the specified device.

        """
        ...

    @abstractmethod
    def boolean_ones_like(self, tensor: TensorType) -> TensorType:
        """Create a boolean ones tensor with the same shape as the input
        tensor.

        Parameters
        ----------
        tensor
            The tensor to create a boolean ones tensor with the same shape.

        Returns
        -------
        TensorType
            A boolean ones tensor with the same shape as the input tensor.

        """
        ...

    @abstractmethod
    def apply_mask(
        self, tensor: TensorType, mask: TensorType, value: Any
    ) -> TensorType:
        """Fill the elements of the tensor where the mask is True with the
        specified value.

        Parameters
        ----------
        tensor
            The tensor to fill.
        mask
            The mask to apply to the tensor.
        value
            The value to fill the tensor with.

        Returns
        -------
        TensorType
            The tensor with the mask applied.

        """
        ...

    @abstractmethod
    def argsort_descending(
        self, tensor: TensorType
    ) -> TensorType:
        """Return the indices that would sort the tensor in descending order
        along axis -1.

        Parameters
        ----------
        tensor
            The tensor to sort.

        Returns
        -------
        TensorType
            The indices that would sort the tensor in descending order along
            axis -1.

        """
        ...

`apply_mask(tensor, mask, value)` `abstractmethod`

Fill the elements of the tensor where the mask is True with the specified value.

Parameters:

Name	Type	Description	Default
`tensor`	`TensorType`	The tensor to fill.	required
`mask`	`TensorType`	The mask to apply to the tensor.	required
`value`	`Any`	The value to fill the tensor with.	required

Returns:

Type	Description
`TensorType`	The tensor with the mask applied.

Source code in outlines/processors/tensor_adapters/base.py

@abstractmethod
def apply_mask(
    self, tensor: TensorType, mask: TensorType, value: Any
) -> TensorType:
    """Fill the elements of the tensor where the mask is True with the
    specified value.

    Parameters
    ----------
    tensor
        The tensor to fill.
    mask
        The mask to apply to the tensor.
    value
        The value to fill the tensor with.

    Returns
    -------
    TensorType
        The tensor with the mask applied.

    """
    ...

`argsort_descending(tensor)` `abstractmethod`

Return the indices that would sort the tensor in descending order along axis -1.

Parameters:

Name	Type	Description	Default
`tensor`	`TensorType`	The tensor to sort.	required

Returns:

Type	Description
`TensorType`	The indices that would sort the tensor in descending order along axis -1.

Source code in outlines/processors/tensor_adapters/base.py

@abstractmethod
def argsort_descending(
    self, tensor: TensorType
) -> TensorType:
    """Return the indices that would sort the tensor in descending order
    along axis -1.

    Parameters
    ----------
    tensor
        The tensor to sort.

    Returns
    -------
    TensorType
        The indices that would sort the tensor in descending order along
        axis -1.

    """
    ...

`boolean_ones_like(tensor)` `abstractmethod`

Create a boolean ones tensor with the same shape as the input tensor.

Parameters:

Name	Type	Description	Default
`tensor`	`TensorType`	The tensor to create a boolean ones tensor with the same shape.	required

Returns:

Type	Description
`TensorType`	A boolean ones tensor with the same shape as the input tensor.

Source code in outlines/processors/tensor_adapters/base.py

@abstractmethod
def boolean_ones_like(self, tensor: TensorType) -> TensorType:
    """Create a boolean ones tensor with the same shape as the input
    tensor.

    Parameters
    ----------
    tensor
        The tensor to create a boolean ones tensor with the same shape.

    Returns
    -------
    TensorType
        A boolean ones tensor with the same shape as the input tensor.

    """
    ...

`concatenate(tensors)` `abstractmethod`

Concatenate a list of tensors along axis 0.

ATTENTION: This method can either receive a list of torch tensors or a list of tensors from the library used.

Parameters:

Name	Type	Description	Default
`tensors`	`list[Union[Tensor, TensorType]]`	The list of tensors to concatenate.	required

Returns:

Type	Description
`TensorType`	The concatenated tensor.

Source code in outlines/processors/tensor_adapters/base.py

@abstractmethod
def concatenate(
    self, tensors: list[Union["torch.Tensor", TensorType]]
) -> TensorType:
    """Concatenate a list of tensors along axis 0.

    ATTENTION: This method can either receive a list of torch tensors or
    a list of tensors from the library used.

    Parameters
    ----------
    tensors
        The list of tensors to concatenate.

    Returns
    -------
    TensorType
        The concatenated tensor.

    """
    ...

`full_like(tensor, fill_value)` `abstractmethod`

Create a tensor with the same shape as the input tensor filled with a scalar value.

ATTENTION: This method receives a torch tensor regardless of the library used.

Parameters:

Name	Type	Description	Default
`tensor`	`Tensor`	The tensor to create a new tensor with the same shape.	required
`fill_value`	`Any`	The value to fill the new tensor with.	required

Returns:

Type	Description
`TensorType`	A tensor with the same shape as the input tensor filled with the specified value.

Source code in outlines/processors/tensor_adapters/base.py

@abstractmethod
def full_like(self, tensor: "torch.Tensor", fill_value: Any) -> TensorType: # type: ignore
    """Create a tensor with the same shape as the input tensor filled
    with a scalar value.

    ATTENTION: This method receives a torch tensor regardless of the
    library used.

    Parameters
    ----------
    tensor
        The tensor to create a new tensor with the same shape.
    fill_value
        The value to fill the new tensor with.

    Returns
    -------
    TensorType
        A tensor with the same shape as the input tensor filled with the
        specified value.

    """
    ...

`get_device(tensor)` `abstractmethod`

Get the name of the tensor's device.

Parameters:

Name	Type	Description	Default
`tensor`	`TensorType`	The tensor to get the device of.	required

Returns:

Type	Description
`str`	The name of the tensor's device.

Source code in outlines/processors/tensor_adapters/base.py

@abstractmethod
def get_device(self, tensor: TensorType) -> str:
    """Get the name of the tensor's device.

    Parameters
    ----------
    tensor
        The tensor to get the device of.

    Returns
    -------
    str
        The name of the tensor's device.

    """
    ...

`shape(tensor)` `abstractmethod`

Get the shape of the tensor.

Parameters:

Name	Type	Description	Default
`tensor`	`TensorType`	The tensor to get the shape of.	required

Returns:

Type	Description
`list[int]`	The shape of the tensor. The list contains as many elements as there are dimensions in the tensor.

Source code in outlines/processors/tensor_adapters/base.py

@abstractmethod
def shape(self, tensor: TensorType) -> list[int]:
    """Get the shape of the tensor.

    Parameters
    ----------
    tensor
        The tensor to get the shape of.

    Returns
    -------
    list[int]
        The shape of the tensor. The list contains as many elements as
        there are dimensions in the tensor.

    """
    ...

`squeeze(tensor)` `abstractmethod`

Remove a dimension from the tensor at axis 0.

Parameters:

Name	Type	Description	Default
`tensor`	`TensorType`	The tensor to remove a dimension from.	required

Returns:

Type	Description
`TensorType`	The tensor with one less dimension.

Source code in outlines/processors/tensor_adapters/base.py

@abstractmethod
def squeeze(self, tensor: TensorType) -> TensorType:
    """Remove a dimension from the tensor at axis 0.

    Parameters
    ----------
    tensor
        The tensor to remove a dimension from.

    Returns
    -------
    TensorType
        The tensor with one less dimension.

    """
    ...

`to_device(tensor, device)` `abstractmethod`

Move the tensor to a specified device.

Parameters:

Name	Type	Description	Default
`tensor`	`TensorType`	The tensor to move to a specified device.	required
`device`	`str`	The name of the device to move the tensor to.	required

Returns:

Type	Description
`TensorType`	The tensor moved to the specified device.

Source code in outlines/processors/tensor_adapters/base.py

@abstractmethod
def to_device(self, tensor: TensorType, device: str) -> TensorType:
    """Move the tensor to a specified device.

    Parameters
    ----------
    tensor
        The tensor to move to a specified device.
    device
        The name of the device to move the tensor to.

    Returns
    -------
    TensorType
        The tensor moved to the specified device.

    """
    ...

`to_list(tensor)` `abstractmethod`

Convert the tensor to a list.

Parameters:

Name	Type	Description	Default
`tensor`	`TensorType`	The tensor to convert to a list.	required

Returns:

Type	Description
`list`	The tensor as a list.

Source code in outlines/processors/tensor_adapters/base.py

@abstractmethod
def to_list(self, tensor: TensorType) -> list:
    """Convert the tensor to a list.

    Parameters
    ----------
    tensor
        The tensor to convert to a list.

    Returns
    -------
    list
        The tensor as a list.

    """
    ...

`to_scalar(tensor)` `abstractmethod`

Return the only element of the tensor.

Parameters:

Name	Type	Description	Default
`tensor`	`TensorType`	The tensor to return the only element of.	required

Returns:

Type	Description
`Any`	The only element of the tensor.

Source code in outlines/processors/tensor_adapters/base.py

@abstractmethod
def to_scalar(self, tensor: TensorType) -> Any:
    """Return the only element of the tensor.

    Parameters
    ----------
    tensor
        The tensor to return the only element of.

    Returns
    -------
    Any
        The only element of the tensor.

    """
    ...

`unsqueeze(tensor)` `abstractmethod`

Add a dimension to the tensor at axis 0.

Parameters:

Name	Type	Description	Default
`tensor`	`TensorType`	The tensor to add a dimension to.	required

Returns:

Type	Description
`TensorType`	The tensor with an additional dimension.

Source code in outlines/processors/tensor_adapters/base.py

@abstractmethod
def unsqueeze(self, tensor: TensorType) -> TensorType:
    """Add a dimension to the tensor at axis 0.

    Parameters
    ----------
    tensor
        The tensor to add a dimension to.

    Returns
    -------
    TensorType
        The tensor with an additional dimension.

    """
    ...

`mlx`

Tensor adapter for the mlx library.

`numpy`

Tensor adapter for the numpy library.

`torch`

Tensor adapter for the torch library.

`templates`

Create templates to easily build prompts.

`Template` `dataclass`

Represents a prompt template.

We return a Template class instead of a simple function so the template can be accessed by callers.

Source code in outlines/templates.py

@dataclass
class Template:
    """Represents a prompt template.

    We return a `Template` class instead of a simple function so the
    template can be accessed by callers.

    """
    template: jinja2.Template

    def __call__(self, *args, **kwargs) -> str:
        """Render and return the template.

        Returns
        -------
        str
            The rendered template as a Python string.

        """
        return self.template.render(**kwargs)

    @classmethod
    def from_string(cls, content: str, filters: Dict[str, Callable] = {}):
        """Create a `Template` instance from a string containing a Jinja
        template.

        Parameters
        ----------
        content : str
            The string content to be converted into a template.

        Returns
        -------
        Template
            An instance of the class with the provided content as a template.

        """
        return cls(build_template_from_string(content, filters))

    @classmethod
    def from_file(cls, path: Path, filters: Dict[str, Callable] = {}):
        """Create a `Template` instance from a file containing a Jinja
        template.

        Note: This method does not allow to include and inheritance to
        reference files that are outside the folder or subfolders of the file
        given to `from_file`.

        Parameters
        ----------
        path : Path
            The path to the file containing the Jinja template.

        Returns
        -------
        Template
            An instance of the Template class with the template loaded from the
            file.

        """
        # We don't use a `Signature` here because it seems not feasible to
        # infer one from a Jinja2 environment that is
        # split across multiple files (since e.g. we support features like
        # Jinja2 includes and template inheritance)
        return cls(build_template_from_file(path, filters))

`call(*args, **kwargs)`

Render and return the template.

Returns:

Type	Description
`str`	The rendered template as a Python string.

Source code in outlines/templates.py

def __call__(self, *args, **kwargs) -> str:
    """Render and return the template.

    Returns
    -------
    str
        The rendered template as a Python string.

    """
    return self.template.render(**kwargs)

`from_file(path, filters={})` `classmethod`

Create a Template instance from a file containing a Jinja template.

Note: This method does not allow to include and inheritance to reference files that are outside the folder or subfolders of the file given to from_file.

Parameters:

Name	Type	Description	Default
`path`	`Path`	The path to the file containing the Jinja template.	required

Returns:

Type	Description
`Template`	An instance of the Template class with the template loaded from the file.

Source code in outlines/templates.py

@classmethod
def from_file(cls, path: Path, filters: Dict[str, Callable] = {}):
    """Create a `Template` instance from a file containing a Jinja
    template.

    Note: This method does not allow to include and inheritance to
    reference files that are outside the folder or subfolders of the file
    given to `from_file`.

    Parameters
    ----------
    path : Path
        The path to the file containing the Jinja template.

    Returns
    -------
    Template
        An instance of the Template class with the template loaded from the
        file.

    """
    # We don't use a `Signature` here because it seems not feasible to
    # infer one from a Jinja2 environment that is
    # split across multiple files (since e.g. we support features like
    # Jinja2 includes and template inheritance)
    return cls(build_template_from_file(path, filters))

`from_string(content, filters={})` `classmethod`

Create a Template instance from a string containing a Jinja template.

Parameters:

Name	Type	Description	Default
`content`	`str`	The string content to be converted into a template.	required

Returns:

Type	Description
`Template`	An instance of the class with the provided content as a template.

Source code in outlines/templates.py

@classmethod
def from_string(cls, content: str, filters: Dict[str, Callable] = {}):
    """Create a `Template` instance from a string containing a Jinja
    template.

    Parameters
    ----------
    content : str
        The string content to be converted into a template.

    Returns
    -------
    Template
        An instance of the class with the provided content as a template.

    """
    return cls(build_template_from_string(content, filters))

`Vision(prompt, image)`

This factory function replaces the deprecated Vision class until it is fully removed in outlines v1.2.0.

Parameters:

Name	Type	Description	Default
`prompt`	`str`	The prompt to use to generate the response.	required
`image`	`Image`	The image to use to generate the response.	required

Returns:

Type	Description
`list`	A list containing the prompt and Image instance.

Source code in outlines/templates.py

def Vision(prompt: str, image: PILImage.Image) -> list:
    """This factory function replaces the deprecated `Vision` class until it is
    fully removed in outlines v1.2.0.

    Parameters
    ----------
    prompt
        The prompt to use to generate the response.
    image
        The image to use to generate the response.

    Returns
    -------
    list
        A list containing the prompt and Image instance.
    """
    warnings.warn("""
        The Vision function is deprecated and will be removed in outlines 1.2.0.
        Instead of using Vision, please use a prompt along with an
        outlines.inputs.Image instance.
        For instance:
        ```python
        import openai
        from outlines import Image, from_openai
        model = from_openai("gpt-4o")
        response = model(
            ["A beautiful image of a cat", Image(my_image)],
            max_tokens=100
        )
        ```
        """,
        DeprecationWarning,
        stacklevel=2,
    )
    return [prompt, Image(image)]

`create_jinja_env(loader, filters)`

Create a new Jinja environment.

The Jinja environment is loaded with a set of pre-defined filters: - name: get the name of a function - description: get a function's docstring - source: get a function's source code - signature: get a function's signature - args: get a function's arguments - schema: display a JSON Schema

Users may pass additional filters, and/or override existing ones.

Parameters:

Name	Type	Description	Default
`loader`	`Optional[BaseLoader]`	An optional `BaseLoader` instance	required
`filters`	`Dict[str, Callable]`	A dictionary of filters, map between the filter's name and the corresponding function.	required

Source code in outlines/templates.py

def create_jinja_env(
    loader: Optional[jinja2.BaseLoader], filters: Dict[str, Callable]
) -> jinja2.Environment:
    """Create a new Jinja environment.

    The Jinja environment is loaded with a set of pre-defined filters:
    - `name`: get the name of a function
    - `description`: get a function's docstring
    - `source`: get a function's source code
    - `signature`: get a function's signature
    - `args`: get a function's arguments
    - `schema`: display a JSON Schema

    Users may pass additional filters, and/or override existing ones.

    Parameters
    ----------
    loader
       An optional `BaseLoader` instance
    filters
       A dictionary of filters, map between the filter's name and the
       corresponding function.

    """
    env = jinja2.Environment(
        loader=loader,
        trim_blocks=True,
        lstrip_blocks=True,
        keep_trailing_newline=True,
        undefined=jinja2.StrictUndefined,
    )

    env.filters["name"] = get_fn_name
    env.filters["description"] = get_fn_description
    env.filters["source"] = get_fn_source
    env.filters["signature"] = get_fn_signature
    env.filters["schema"] = get_schema
    env.filters["args"] = get_fn_args

    # The filters passed by the user may override the
    # pre-defined filters.
    for name, filter_fn in filters.items():
        env.filters[name] = filter_fn

    return env

`get_fn_args(fn)`

Returns the arguments of a function with annotations and default values if provided.

Source code in outlines/templates.py

def get_fn_args(fn: Callable):
    """Returns the arguments of a function with annotations and default values if provided."""
    if not callable(fn):
        raise TypeError("The `args` filter only applies to callables.")

    arg_str_list = []
    signature = inspect.signature(fn)
    arg_str_list = [str(param) for param in signature.parameters.values()]
    arg_str = ", ".join(arg_str_list)
    return arg_str

`get_fn_description(fn)`

Returns the first line of a callable's docstring.

Source code in outlines/templates.py

def get_fn_description(fn: Callable):
    """Returns the first line of a callable's docstring."""
    if not callable(fn):
        raise TypeError("The `description` filter only applies to callables.")

    docstring = inspect.getdoc(fn)
    if docstring is None:
        description = ""
    else:
        description = docstring.split("\n")[0].strip()

    return description

`get_fn_name(fn)`

Returns the name of a callable.

Source code in outlines/templates.py

def get_fn_name(fn: Callable):
    """Returns the name of a callable."""
    if not callable(fn):
        raise TypeError("The `name` filter only applies to callables.")

    if not hasattr(fn, "__name__"):
        name = type(fn).__name__
    else:
        name = fn.__name__

    return name

`get_fn_signature(fn)`

Return the signature of a callable.

Source code in outlines/templates.py

def get_fn_signature(fn: Callable):
    """Return the signature of a callable."""
    if not callable(fn):
        raise TypeError("The `source` filter only applies to callables.")

    source = textwrap.dedent(inspect.getsource(fn))
    re_search = re.search(re.compile(r"\(([^)]+)\)"), source)
    if re_search is None:  # pragma: no cover
        signature = ""
    else:
        signature = re_search.group(1)

    return signature

`get_fn_source(fn)`

Return the source code of a callable.

Source code in outlines/templates.py

def get_fn_source(fn: Callable):
    """Return the source code of a callable."""
    if not callable(fn):
        raise TypeError("The `source` filter only applies to callables.")

    source = textwrap.dedent(inspect.getsource(fn))
    re_search = re.search(re.compile(r"(\bdef\b.*)", re.DOTALL), source)
    if re_search is not None:
        source = re_search.group(0)
    else:  # pragma: no cover
        raise TypeError("Could not read the function's source code")

    return source

`get_schema_dict(model)`

Return a pretty-printed dictionary

Source code in outlines/templates.py

@get_schema.register(dict)
def get_schema_dict(model: Dict):
    """Return a pretty-printed dictionary"""
    return json.dumps(model, indent=2)

`get_schema_pydantic(model)`

Return the schema of a Pydantic model.

Source code in outlines/templates.py

@get_schema.register(type(BaseModel))
def get_schema_pydantic(model: Type[BaseModel]):
    """Return the schema of a Pydantic model."""
    if hasattr(model, "model_json_schema"):
        def_key = "$defs"
        raw_schema = model.model_json_schema()
    else:  # pragma: no cover
        def_key = "definitions"
        raw_schema = model.schema()

    definitions = raw_schema.get(def_key, None)
    schema = parse_pydantic_schema(raw_schema, definitions)

    return json.dumps(schema, indent=2)

`parse_pydantic_schema(raw_schema, definitions)`

Parse the output of Basemodel.[schema|model_json_schema]().

This recursively follows the references to other schemas in case of nested models. Other schemas are stored under the "definitions" key in the schema of the top-level model.

Source code in outlines/templates.py

def parse_pydantic_schema(raw_schema, definitions):
    """Parse the output of `Basemodel.[schema|model_json_schema]()`.

    This recursively follows the references to other schemas in case
    of nested models. Other schemas are stored under the "definitions"
    key in the schema of the top-level model.

    """
    simple_schema = {}
    for name, value in raw_schema["properties"].items():
        if "description" in value:
            simple_schema[name] = value["description"]
        elif "$ref" in value: # pragma: no cover
            refs = value["$ref"].split("/")
            simple_schema[name] = parse_pydantic_schema(
                definitions[refs[2]], definitions
            )
        else:
            simple_schema[name] = f"<{name}>"

    return simple_schema

`types`

Output types for structured generation and regex DSL.

`AirportImportError`

Dummy module that raises an error when accessed.

Source code in outlines/types/__init__.py

class AirportImportError:
    """Dummy module that raises an error when accessed."""
    def __getattr__(self, name):
        raise ImportError(
            "The 'airportsdata' package is required to use airport types. "
            "Install it with: pip install 'outlines[airports]'"
        )

`CFG` `dataclass`

Bases: Term

Class representing a context-free grammar.

Parameters:

Name	Type	Description	Default
`definition`	`str`	The definition of the context-free grammar as a string.	required

Source code in outlines/types/dsl.py

@dataclass
class CFG(Term):
    """Class representing a context-free grammar.

    Parameters
    ----------
    definition
        The definition of the context-free grammar as a string.

    """
    definition: str

    def _display_node(self) -> str:
        return f"CFG('{self.definition}')"

    def __repr__(self):
        return f"CFG(definition='{self.definition}')"

    def __eq__(self, other):
        if not isinstance(other, CFG):
            return False
        return self.definition == other.definition

    @classmethod
    def from_file(cls, path: str) -> "CFG":
        """Create a CFG instance from a file containing a CFG definition.

        Parameters
        ----------
        path : str
            The path to the file containing the CFG definition.
        Returns
        -------
        CFG
            A CFG instance.

        """
        with open(path, "r") as f:
            definition = f.read()
        return cls(definition)

`from_file(path)` `classmethod`

Create a CFG instance from a file containing a CFG definition.

Parameters:

Name	Type	Description	Default
`path`	`str`	The path to the file containing the CFG definition.	required

Returns:

Type	Description
`CFG`	A CFG instance.

Source code in outlines/types/dsl.py

@classmethod
def from_file(cls, path: str) -> "CFG":
    """Create a CFG instance from a file containing a CFG definition.

    Parameters
    ----------
    path : str
        The path to the file containing the CFG definition.
    Returns
    -------
    CFG
        A CFG instance.

    """
    with open(path, "r") as f:
        definition = f.read()
    return cls(definition)

`Choice` `dataclass`

Bases: Term

Class representing a choice between different items.

Parameters:

Name	Type	Description	Default
`items`	`List[Any]`	The items to choose from.	required

Source code in outlines/types/dsl.py

@dataclass
class Choice(Term):
    """Class representing a choice between different items.

    Parameters
    ----------
    items
        The items to choose from.

    """
    items: List[Any]

    def _display_node(self) -> str:
        return f"Choice({repr(self.items)})"

    def __repr__(self):
        return f"Choice(items={repr(self.items)})"

`CountryImportError`

Dummy module that raises an error when accessed.

Source code in outlines/types/__init__.py

class CountryImportError:
    """Dummy module that raises an error when accessed."""
    def __getattr__(self, name):
        raise ImportError(
            "The 'iso3166' package is required to use country types. "
            "Install it with: pip install 'outlines[countries]'"
        )

`JsonSchema`

Bases: Term

Class representing a JSON schema.

The JSON schema object from which to instantiate the class can be a dictionary, a string, a Pydantic model, a typed dict, a dataclass, or a genSON schema builder.

Source code in outlines/types/dsl.py

class JsonSchema(Term):
    """Class representing a JSON schema.

    The JSON schema object from which to instantiate the class can be a
    dictionary, a string, a Pydantic model, a typed dict, a dataclass, or a
    genSON schema builder.

    """
    schema: str
    whitespace_pattern: OptionalType[str]

    def __init__(
        self,
        schema: Union[
            dict, str, type[BaseModel], _TypedDictMeta, type, SchemaBuilder
        ],
        whitespace_pattern: OptionalType[str] = None,
        ensure_ascii: bool = True,
    ):
        """
        Parameters
        ----------
        schema
            The object containing the JSON schema.
        whitespace_pattern
            The pattern to use to match whitespace characters.
        ensure_ascii
            Whether to ensure the schema is ASCII-only.

        """
        schema_str: str

        if is_dict_instance(schema):
            schema_str = json.dumps(schema, ensure_ascii=ensure_ascii)
        elif is_str_instance(schema):
            schema_str = str(schema)
        elif is_pydantic_model(schema):
            schema_str = json.dumps(schema.model_json_schema(), ensure_ascii=ensure_ascii) # type: ignore
        elif is_typed_dict(schema):
            schema_str = json.dumps(TypeAdapter(schema).json_schema(), ensure_ascii=ensure_ascii)
        elif is_dataclass(schema):
            schema_str = json.dumps(TypeAdapter(schema).json_schema(), ensure_ascii=ensure_ascii)
        elif is_genson_schema_builder(schema):
            schema_str = schema.to_json(ensure_ascii=ensure_ascii)  # type: ignore
        else:
            raise ValueError(
                f"Cannot parse schema {schema}. The schema must be either "
                + "a Pydantic class, typed dict, a dataclass, a genSON schema "
                + "builder or a string or dict that contains the JSON schema "
                + "specification"
            )

        jsonschema.Draft7Validator.check_schema(json.loads(schema_str))
        self.schema = schema_str
        self.whitespace_pattern = whitespace_pattern

    @classmethod
    def is_json_schema(cls, obj: Any) -> bool:
        """Check if the object provided is a JSON schema type.

        Parameters
        ----------
        obj: Any
            The object to check

        Returns
        -------
        bool
            True if the object is a JSON schema type, False otherwise

        """
        return (
            isinstance(obj, cls)
            or is_pydantic_model(obj)
            or is_typed_dict(obj)
            or is_dataclass(obj)
            or is_genson_schema_builder(obj)
        )

    @classmethod
    def convert_to(
        cls,
        schema: Union[
            "JsonSchema",
            type[BaseModel],
            _TypedDictMeta,
            type,
            SchemaBuilder,
        ],
        target_types: List[Literal[
            "str",
            "dict",
            "pydantic",
            "typeddict",
            "dataclass",
            "genson",
        ]],
    ) -> Union[str, dict, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]:
        """Convert a JSON schema type to a different JSON schema type.

        If the schema provided is already of a type in the target_types, return
        it unchanged.

        Parameters
        ----------
        schema: Union[JsonSchema, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]
            The schema to convert
        target_types: List[Literal["str", "dict", "pydantic", "typeddict", "dataclass", "genson"]]
            The target types to convert to

        """
        # If the schema provided is already of a type in the target_types,
        # just return it
        if isinstance(schema, cls):
            if "str" in target_types:
                return schema.schema
            elif "dict" in target_types:
                return json.loads(schema.schema)
        elif is_pydantic_model(schema) and "pydantic" in target_types:
            return schema
        elif is_typed_dict(schema) and "typeddict" in target_types:
            return schema
        elif is_dataclass(schema) and "dataclass" in target_types:
            return schema
        elif is_genson_schema_builder(schema) and "genson" in target_types:
            return schema

        # Convert the schema to a JSON schema string/dict
        if isinstance(schema, cls):
            schema_str = schema.schema
        else:
            schema_str = cls(schema).schema
        schema_dict = json.loads(schema_str)

        for target_type in target_types:
            try:
                # Convert the JSON schema string to the target type
                if target_type == "str":
                    return schema_str
                elif target_type == "dict":
                    return schema_dict
                elif target_type == "pydantic":
                    return json_schema_dict_to_pydantic(schema_dict)
                elif target_type == "typeddict":
                    return json_schema_dict_to_typeddict(schema_dict)
                elif target_type == "dataclass":
                    return json_schema_dict_to_dataclass(schema_dict)
                # No conversion available for genson
            except Exception as e:  # pragma: no cover
                warnings.warn(
                    f"Cannot convert schema type {type(schema)} to {target_type}: {e}"
                )
                continue

        raise ValueError(
            f"Cannot convert schema type {type(schema)} to any of the target "
            f"types {target_types}"
        )

    def _display_node(self) -> str:
        return f"JsonSchema('{self.schema}')"

    def __repr__(self):
        return f"JsonSchema(schema='{self.schema}')"

    def __eq__(self, other):
        if not isinstance(other, JsonSchema):
            return False
        try:
            self_dict = json.loads(self.schema)
            other_dict = json.loads(other.schema)
            return self_dict == other_dict
        except json.JSONDecodeError:  # pragma: no cover
            return self.schema == other.schema

    @classmethod
    def from_file(cls, path: str) -> "JsonSchema":
        """Create a JsonSchema instance from a .json file containing a JSON
        schema.

        Parameters
        ----------
        path:
            The path to the file containing the JSON schema.
        Returns
        -------
        JsonSchema
            A JsonSchema instance.

        """
        with open(path, "r") as f:
            schema = json.load(f)
        return cls(schema)

`init(schema, whitespace_pattern=None, ensure_ascii=True)`

Parameters:

Name	Type	Description	Default
`schema`	`Union[dict, str, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]`	The object containing the JSON schema.	required
`whitespace_pattern`	`Optional[str]`	The pattern to use to match whitespace characters.	`None`
`ensure_ascii`	`bool`	Whether to ensure the schema is ASCII-only.	`True`

Source code in outlines/types/dsl.py

def __init__(
    self,
    schema: Union[
        dict, str, type[BaseModel], _TypedDictMeta, type, SchemaBuilder
    ],
    whitespace_pattern: OptionalType[str] = None,
    ensure_ascii: bool = True,
):
    """
    Parameters
    ----------
    schema
        The object containing the JSON schema.
    whitespace_pattern
        The pattern to use to match whitespace characters.
    ensure_ascii
        Whether to ensure the schema is ASCII-only.

    """
    schema_str: str

    if is_dict_instance(schema):
        schema_str = json.dumps(schema, ensure_ascii=ensure_ascii)
    elif is_str_instance(schema):
        schema_str = str(schema)
    elif is_pydantic_model(schema):
        schema_str = json.dumps(schema.model_json_schema(), ensure_ascii=ensure_ascii) # type: ignore
    elif is_typed_dict(schema):
        schema_str = json.dumps(TypeAdapter(schema).json_schema(), ensure_ascii=ensure_ascii)
    elif is_dataclass(schema):
        schema_str = json.dumps(TypeAdapter(schema).json_schema(), ensure_ascii=ensure_ascii)
    elif is_genson_schema_builder(schema):
        schema_str = schema.to_json(ensure_ascii=ensure_ascii)  # type: ignore
    else:
        raise ValueError(
            f"Cannot parse schema {schema}. The schema must be either "
            + "a Pydantic class, typed dict, a dataclass, a genSON schema "
            + "builder or a string or dict that contains the JSON schema "
            + "specification"
        )

    jsonschema.Draft7Validator.check_schema(json.loads(schema_str))
    self.schema = schema_str
    self.whitespace_pattern = whitespace_pattern

`convert_to(schema, target_types)` `classmethod`

Convert a JSON schema type to a different JSON schema type.

If the schema provided is already of a type in the target_types, return it unchanged.

Parameters:

Name	Type	Description	Default
`schema`	`Union[JsonSchema, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]`	The schema to convert	required
`target_types`	`List[Literal['str', 'dict', 'pydantic', 'typeddict', 'dataclass', 'genson']]`	The target types to convert to	required

Source code in outlines/types/dsl.py

@classmethod
def convert_to(
    cls,
    schema: Union[
        "JsonSchema",
        type[BaseModel],
        _TypedDictMeta,
        type,
        SchemaBuilder,
    ],
    target_types: List[Literal[
        "str",
        "dict",
        "pydantic",
        "typeddict",
        "dataclass",
        "genson",
    ]],
) -> Union[str, dict, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]:
    """Convert a JSON schema type to a different JSON schema type.

    If the schema provided is already of a type in the target_types, return
    it unchanged.

    Parameters
    ----------
    schema: Union[JsonSchema, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]
        The schema to convert
    target_types: List[Literal["str", "dict", "pydantic", "typeddict", "dataclass", "genson"]]
        The target types to convert to

    """
    # If the schema provided is already of a type in the target_types,
    # just return it
    if isinstance(schema, cls):
        if "str" in target_types:
            return schema.schema
        elif "dict" in target_types:
            return json.loads(schema.schema)
    elif is_pydantic_model(schema) and "pydantic" in target_types:
        return schema
    elif is_typed_dict(schema) and "typeddict" in target_types:
        return schema
    elif is_dataclass(schema) and "dataclass" in target_types:
        return schema
    elif is_genson_schema_builder(schema) and "genson" in target_types:
        return schema

    # Convert the schema to a JSON schema string/dict
    if isinstance(schema, cls):
        schema_str = schema.schema
    else:
        schema_str = cls(schema).schema
    schema_dict = json.loads(schema_str)

    for target_type in target_types:
        try:
            # Convert the JSON schema string to the target type
            if target_type == "str":
                return schema_str
            elif target_type == "dict":
                return schema_dict
            elif target_type == "pydantic":
                return json_schema_dict_to_pydantic(schema_dict)
            elif target_type == "typeddict":
                return json_schema_dict_to_typeddict(schema_dict)
            elif target_type == "dataclass":
                return json_schema_dict_to_dataclass(schema_dict)
            # No conversion available for genson
        except Exception as e:  # pragma: no cover
            warnings.warn(
                f"Cannot convert schema type {type(schema)} to {target_type}: {e}"
            )
            continue

    raise ValueError(
        f"Cannot convert schema type {type(schema)} to any of the target "
        f"types {target_types}"
    )

`from_file(path)` `classmethod`

Create a JsonSchema instance from a .json file containing a JSON schema.

Parameters:

Name	Type	Description	Default
`path`	`str`	The path to the file containing the JSON schema.	required

Returns:

Type	Description
`JsonSchema`	A JsonSchema instance.

Source code in outlines/types/dsl.py

@classmethod
def from_file(cls, path: str) -> "JsonSchema":
    """Create a JsonSchema instance from a .json file containing a JSON
    schema.

    Parameters
    ----------
    path:
        The path to the file containing the JSON schema.
    Returns
    -------
    JsonSchema
        A JsonSchema instance.

    """
    with open(path, "r") as f:
        schema = json.load(f)
    return cls(schema)

`is_json_schema(obj)` `classmethod`

Check if the object provided is a JSON schema type.

Parameters:

Name	Type	Description	Default
`obj`	`Any`	The object to check	required

Returns:

Type	Description
`bool`	True if the object is a JSON schema type, False otherwise

Source code in outlines/types/dsl.py

@classmethod
def is_json_schema(cls, obj: Any) -> bool:
    """Check if the object provided is a JSON schema type.

    Parameters
    ----------
    obj: Any
        The object to check

    Returns
    -------
    bool
        True if the object is a JSON schema type, False otherwise

    """
    return (
        isinstance(obj, cls)
        or is_pydantic_model(obj)
        or is_typed_dict(obj)
        or is_dataclass(obj)
        or is_genson_schema_builder(obj)
    )

`Regex` `dataclass`

Bases: Term

Class representing a regular expression.

Parameters:

Name	Type	Description	Default
`pattern`	`str`	The regular expression as a string.	required

Source code in outlines/types/dsl.py

@dataclass
class Regex(Term):
    """Class representing a regular expression.

    Parameters
    ----------
    pattern
        The regular expression as a string.

    """
    pattern: str

    def _display_node(self) -> str:
        return f"Regex('{self.pattern}')"

    def __repr__(self):
        return f"Regex(pattern='{self.pattern}')"

`at_least(count, term)`

Repeat the term at least count times.

Source code in outlines/types/dsl.py

def at_least(count: int, term: Union[Term, str]) -> QuantifyMinimum:
    """Repeat the term at least `count` times."""
    term = String(term) if isinstance(term, str) else term
    return QuantifyMinimum(term, count)

`at_most(count, term)`

Repeat the term exactly count times.

Source code in outlines/types/dsl.py

def at_most(count: int, term: Union[Term, str]) -> QuantifyMaximum:
    """Repeat the term exactly `count` times."""
    term = String(term) if isinstance(term, str) else term
    return QuantifyMaximum(term, count)

`either(*terms)`

Represents an alternative between different terms or strings.

This factory function automatically translates string arguments into String objects.

Source code in outlines/types/dsl.py

def either(*terms: Union[str, Term]):
    """Represents an alternative between different terms or strings.

    This factory function automatically translates string arguments
    into `String` objects.

    """
    terms = [String(arg) if isinstance(arg, str) else arg for arg in terms]
    return Alternatives(terms)

`exactly(count, term)`

Repeat the term exactly count times.

Source code in outlines/types/dsl.py

def exactly(count: int, term: Union[Term, str]) -> QuantifyExact:
    """Repeat the term exactly `count` times."""
    term = String(term) if isinstance(term, str) else term
    return QuantifyExact(term, count)

`airports`

Generate valid airport codes.

`countries`

Generate valid country codes and names.

`get_country_flags()`

Generate Unicode flags for all ISO 3166-1 alpha-2 country codes in Alpha2 Enum.

Source code in outlines/types/countries.py

def get_country_flags():
    """Generate Unicode flags for all ISO 3166-1 alpha-2 country codes in Alpha2 Enum."""
    base = ord("🇦")
    return {
        code.name: chr(base + ord(code.name[0]) - ord("A"))
        + chr(base + ord(code.name[1]) - ord("A"))
        for code in Alpha2
    }

`dsl`

Regular expression DSL and output types for structured generation.

This module contains elements related to three logical steps in the use of output types for structured generation:

Definition of Term classes that contain output type definitions. That includes both terms intended to be used by themselves such as JsonSchema or CFG and terms that are part of the regular expression DSL such as Alternatives or KleeneStar (and the related functions).
Conversion of Python types into Term instances (python_types_to_terms).
Conversion of a Term instance into a regular expression (to_regex).

`CFG` `dataclass`

Bases: Term

Class representing a context-free grammar.

Parameters:

Name	Type	Description	Default
`definition`	`str`	The definition of the context-free grammar as a string.	required

Source code in outlines/types/dsl.py

@dataclass
class CFG(Term):
    """Class representing a context-free grammar.

    Parameters
    ----------
    definition
        The definition of the context-free grammar as a string.

    """
    definition: str

    def _display_node(self) -> str:
        return f"CFG('{self.definition}')"

    def __repr__(self):
        return f"CFG(definition='{self.definition}')"

    def __eq__(self, other):
        if not isinstance(other, CFG):
            return False
        return self.definition == other.definition

    @classmethod
    def from_file(cls, path: str) -> "CFG":
        """Create a CFG instance from a file containing a CFG definition.

        Parameters
        ----------
        path : str
            The path to the file containing the CFG definition.
        Returns
        -------
        CFG
            A CFG instance.

        """
        with open(path, "r") as f:
            definition = f.read()
        return cls(definition)

`from_file(path)` `classmethod`

Create a CFG instance from a file containing a CFG definition.

Parameters:

Name	Type	Description	Default
`path`	`str`	The path to the file containing the CFG definition.	required

Returns:

Type	Description
`CFG`	A CFG instance.

Source code in outlines/types/dsl.py

@classmethod
def from_file(cls, path: str) -> "CFG":
    """Create a CFG instance from a file containing a CFG definition.

    Parameters
    ----------
    path : str
        The path to the file containing the CFG definition.
    Returns
    -------
    CFG
        A CFG instance.

    """
    with open(path, "r") as f:
        definition = f.read()
    return cls(definition)

`Choice` `dataclass`

Bases: Term

Class representing a choice between different items.

Parameters:

Name	Type	Description	Default
`items`	`List[Any]`	The items to choose from.	required

Source code in outlines/types/dsl.py

@dataclass
class Choice(Term):
    """Class representing a choice between different items.

    Parameters
    ----------
    items
        The items to choose from.

    """
    items: List[Any]

    def _display_node(self) -> str:
        return f"Choice({repr(self.items)})"

    def __repr__(self):
        return f"Choice(items={repr(self.items)})"

`JsonSchema`

Bases: Term

Class representing a JSON schema.

The JSON schema object from which to instantiate the class can be a dictionary, a string, a Pydantic model, a typed dict, a dataclass, or a genSON schema builder.

Source code in outlines/types/dsl.py

class JsonSchema(Term):
    """Class representing a JSON schema.

    The JSON schema object from which to instantiate the class can be a
    dictionary, a string, a Pydantic model, a typed dict, a dataclass, or a
    genSON schema builder.

    """
    schema: str
    whitespace_pattern: OptionalType[str]

    def __init__(
        self,
        schema: Union[
            dict, str, type[BaseModel], _TypedDictMeta, type, SchemaBuilder
        ],
        whitespace_pattern: OptionalType[str] = None,
        ensure_ascii: bool = True,
    ):
        """
        Parameters
        ----------
        schema
            The object containing the JSON schema.
        whitespace_pattern
            The pattern to use to match whitespace characters.
        ensure_ascii
            Whether to ensure the schema is ASCII-only.

        """
        schema_str: str

        if is_dict_instance(schema):
            schema_str = json.dumps(schema, ensure_ascii=ensure_ascii)
        elif is_str_instance(schema):
            schema_str = str(schema)
        elif is_pydantic_model(schema):
            schema_str = json.dumps(schema.model_json_schema(), ensure_ascii=ensure_ascii) # type: ignore
        elif is_typed_dict(schema):
            schema_str = json.dumps(TypeAdapter(schema).json_schema(), ensure_ascii=ensure_ascii)
        elif is_dataclass(schema):
            schema_str = json.dumps(TypeAdapter(schema).json_schema(), ensure_ascii=ensure_ascii)
        elif is_genson_schema_builder(schema):
            schema_str = schema.to_json(ensure_ascii=ensure_ascii)  # type: ignore
        else:
            raise ValueError(
                f"Cannot parse schema {schema}. The schema must be either "
                + "a Pydantic class, typed dict, a dataclass, a genSON schema "
                + "builder or a string or dict that contains the JSON schema "
                + "specification"
            )

        jsonschema.Draft7Validator.check_schema(json.loads(schema_str))
        self.schema = schema_str
        self.whitespace_pattern = whitespace_pattern

    @classmethod
    def is_json_schema(cls, obj: Any) -> bool:
        """Check if the object provided is a JSON schema type.

        Parameters
        ----------
        obj: Any
            The object to check

        Returns
        -------
        bool
            True if the object is a JSON schema type, False otherwise

        """
        return (
            isinstance(obj, cls)
            or is_pydantic_model(obj)
            or is_typed_dict(obj)
            or is_dataclass(obj)
            or is_genson_schema_builder(obj)
        )

    @classmethod
    def convert_to(
        cls,
        schema: Union[
            "JsonSchema",
            type[BaseModel],
            _TypedDictMeta,
            type,
            SchemaBuilder,
        ],
        target_types: List[Literal[
            "str",
            "dict",
            "pydantic",
            "typeddict",
            "dataclass",
            "genson",
        ]],
    ) -> Union[str, dict, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]:
        """Convert a JSON schema type to a different JSON schema type.

        If the schema provided is already of a type in the target_types, return
        it unchanged.

        Parameters
        ----------
        schema: Union[JsonSchema, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]
            The schema to convert
        target_types: List[Literal["str", "dict", "pydantic", "typeddict", "dataclass", "genson"]]
            The target types to convert to

        """
        # If the schema provided is already of a type in the target_types,
        # just return it
        if isinstance(schema, cls):
            if "str" in target_types:
                return schema.schema
            elif "dict" in target_types:
                return json.loads(schema.schema)
        elif is_pydantic_model(schema) and "pydantic" in target_types:
            return schema
        elif is_typed_dict(schema) and "typeddict" in target_types:
            return schema
        elif is_dataclass(schema) and "dataclass" in target_types:
            return schema
        elif is_genson_schema_builder(schema) and "genson" in target_types:
            return schema

        # Convert the schema to a JSON schema string/dict
        if isinstance(schema, cls):
            schema_str = schema.schema
        else:
            schema_str = cls(schema).schema
        schema_dict = json.loads(schema_str)

        for target_type in target_types:
            try:
                # Convert the JSON schema string to the target type
                if target_type == "str":
                    return schema_str
                elif target_type == "dict":
                    return schema_dict
                elif target_type == "pydantic":
                    return json_schema_dict_to_pydantic(schema_dict)
                elif target_type == "typeddict":
                    return json_schema_dict_to_typeddict(schema_dict)
                elif target_type == "dataclass":
                    return json_schema_dict_to_dataclass(schema_dict)
                # No conversion available for genson
            except Exception as e:  # pragma: no cover
                warnings.warn(
                    f"Cannot convert schema type {type(schema)} to {target_type}: {e}"
                )
                continue

        raise ValueError(
            f"Cannot convert schema type {type(schema)} to any of the target "
            f"types {target_types}"
        )

    def _display_node(self) -> str:
        return f"JsonSchema('{self.schema}')"

    def __repr__(self):
        return f"JsonSchema(schema='{self.schema}')"

    def __eq__(self, other):
        if not isinstance(other, JsonSchema):
            return False
        try:
            self_dict = json.loads(self.schema)
            other_dict = json.loads(other.schema)
            return self_dict == other_dict
        except json.JSONDecodeError:  # pragma: no cover
            return self.schema == other.schema

    @classmethod
    def from_file(cls, path: str) -> "JsonSchema":
        """Create a JsonSchema instance from a .json file containing a JSON
        schema.

        Parameters
        ----------
        path:
            The path to the file containing the JSON schema.
        Returns
        -------
        JsonSchema
            A JsonSchema instance.

        """
        with open(path, "r") as f:
            schema = json.load(f)
        return cls(schema)

`init(schema, whitespace_pattern=None, ensure_ascii=True)`

Parameters:

Name	Type	Description	Default
`schema`	`Union[dict, str, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]`	The object containing the JSON schema.	required
`whitespace_pattern`	`Optional[str]`	The pattern to use to match whitespace characters.	`None`
`ensure_ascii`	`bool`	Whether to ensure the schema is ASCII-only.	`True`

Source code in outlines/types/dsl.py

def __init__(
    self,
    schema: Union[
        dict, str, type[BaseModel], _TypedDictMeta, type, SchemaBuilder
    ],
    whitespace_pattern: OptionalType[str] = None,
    ensure_ascii: bool = True,
):
    """
    Parameters
    ----------
    schema
        The object containing the JSON schema.
    whitespace_pattern
        The pattern to use to match whitespace characters.
    ensure_ascii
        Whether to ensure the schema is ASCII-only.

    """
    schema_str: str

    if is_dict_instance(schema):
        schema_str = json.dumps(schema, ensure_ascii=ensure_ascii)
    elif is_str_instance(schema):
        schema_str = str(schema)
    elif is_pydantic_model(schema):
        schema_str = json.dumps(schema.model_json_schema(), ensure_ascii=ensure_ascii) # type: ignore
    elif is_typed_dict(schema):
        schema_str = json.dumps(TypeAdapter(schema).json_schema(), ensure_ascii=ensure_ascii)
    elif is_dataclass(schema):
        schema_str = json.dumps(TypeAdapter(schema).json_schema(), ensure_ascii=ensure_ascii)
    elif is_genson_schema_builder(schema):
        schema_str = schema.to_json(ensure_ascii=ensure_ascii)  # type: ignore
    else:
        raise ValueError(
            f"Cannot parse schema {schema}. The schema must be either "
            + "a Pydantic class, typed dict, a dataclass, a genSON schema "
            + "builder or a string or dict that contains the JSON schema "
            + "specification"
        )

    jsonschema.Draft7Validator.check_schema(json.loads(schema_str))
    self.schema = schema_str
    self.whitespace_pattern = whitespace_pattern

`convert_to(schema, target_types)` `classmethod`

Convert a JSON schema type to a different JSON schema type.

If the schema provided is already of a type in the target_types, return it unchanged.

Parameters:

Name	Type	Description	Default
`schema`	`Union[JsonSchema, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]`	The schema to convert	required
`target_types`	`List[Literal['str', 'dict', 'pydantic', 'typeddict', 'dataclass', 'genson']]`	The target types to convert to	required

Source code in outlines/types/dsl.py

@classmethod
def convert_to(
    cls,
    schema: Union[
        "JsonSchema",
        type[BaseModel],
        _TypedDictMeta,
        type,
        SchemaBuilder,
    ],
    target_types: List[Literal[
        "str",
        "dict",
        "pydantic",
        "typeddict",
        "dataclass",
        "genson",
    ]],
) -> Union[str, dict, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]:
    """Convert a JSON schema type to a different JSON schema type.

    If the schema provided is already of a type in the target_types, return
    it unchanged.

    Parameters
    ----------
    schema: Union[JsonSchema, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]
        The schema to convert
    target_types: List[Literal["str", "dict", "pydantic", "typeddict", "dataclass", "genson"]]
        The target types to convert to

    """
    # If the schema provided is already of a type in the target_types,
    # just return it
    if isinstance(schema, cls):
        if "str" in target_types:
            return schema.schema
        elif "dict" in target_types:
            return json.loads(schema.schema)
    elif is_pydantic_model(schema) and "pydantic" in target_types:
        return schema
    elif is_typed_dict(schema) and "typeddict" in target_types:
        return schema
    elif is_dataclass(schema) and "dataclass" in target_types:
        return schema
    elif is_genson_schema_builder(schema) and "genson" in target_types:
        return schema

    # Convert the schema to a JSON schema string/dict
    if isinstance(schema, cls):
        schema_str = schema.schema
    else:
        schema_str = cls(schema).schema
    schema_dict = json.loads(schema_str)

    for target_type in target_types:
        try:
            # Convert the JSON schema string to the target type
            if target_type == "str":
                return schema_str
            elif target_type == "dict":
                return schema_dict
            elif target_type == "pydantic":
                return json_schema_dict_to_pydantic(schema_dict)
            elif target_type == "typeddict":
                return json_schema_dict_to_typeddict(schema_dict)
            elif target_type == "dataclass":
                return json_schema_dict_to_dataclass(schema_dict)
            # No conversion available for genson
        except Exception as e:  # pragma: no cover
            warnings.warn(
                f"Cannot convert schema type {type(schema)} to {target_type}: {e}"
            )
            continue

    raise ValueError(
        f"Cannot convert schema type {type(schema)} to any of the target "
        f"types {target_types}"
    )

`from_file(path)` `classmethod`

Create a JsonSchema instance from a .json file containing a JSON schema.

Parameters:

Name	Type	Description	Default
`path`	`str`	The path to the file containing the JSON schema.	required

Returns:

Type	Description
`JsonSchema`	A JsonSchema instance.

Source code in outlines/types/dsl.py

@classmethod
def from_file(cls, path: str) -> "JsonSchema":
    """Create a JsonSchema instance from a .json file containing a JSON
    schema.

    Parameters
    ----------
    path:
        The path to the file containing the JSON schema.
    Returns
    -------
    JsonSchema
        A JsonSchema instance.

    """
    with open(path, "r") as f:
        schema = json.load(f)
    return cls(schema)

`is_json_schema(obj)` `classmethod`

Check if the object provided is a JSON schema type.

Parameters:

Name	Type	Description	Default
`obj`	`Any`	The object to check	required

Returns:

Type	Description
`bool`	True if the object is a JSON schema type, False otherwise

Source code in outlines/types/dsl.py

@classmethod
def is_json_schema(cls, obj: Any) -> bool:
    """Check if the object provided is a JSON schema type.

    Parameters
    ----------
    obj: Any
        The object to check

    Returns
    -------
    bool
        True if the object is a JSON schema type, False otherwise

    """
    return (
        isinstance(obj, cls)
        or is_pydantic_model(obj)
        or is_typed_dict(obj)
        or is_dataclass(obj)
        or is_genson_schema_builder(obj)
    )

`Regex` `dataclass`

Bases: Term

Class representing a regular expression.

Parameters:

Name	Type	Description	Default
`pattern`	`str`	The regular expression as a string.	required

Source code in outlines/types/dsl.py

@dataclass
class Regex(Term):
    """Class representing a regular expression.

    Parameters
    ----------
    pattern
        The regular expression as a string.

    """
    pattern: str

    def _display_node(self) -> str:
        return f"Regex('{self.pattern}')"

    def __repr__(self):
        return f"Regex(pattern='{self.pattern}')"

`Term`

Represents types defined with a regular expression.

Regex instances can be used as a type in a Pydantic model definittion. They will be translated to JSON Schema as a "string" field with the "pattern" keyword set to the regular expression this class represents. The class also handles validation.

Examples:

>>> from outlines.types import Regex
>>> from pydantic import BaseModel
>>>
>>> age_type = Regex("[0-9]+")
>>>
>>> class User(BaseModel):
>>>     name: str
>>>     age: age_type

Source code in outlines/types/dsl.py

class Term:
    """Represents types defined with a regular expression.

    `Regex` instances can be used as a type in a Pydantic model definittion.
    They will be translated to JSON Schema as a "string" field with the
    "pattern" keyword set to the regular expression this class represents. The
    class also handles validation.

    Examples
    --------

    >>> from outlines.types import Regex
    >>> from pydantic import BaseModel
    >>>
    >>> age_type = Regex("[0-9]+")
    >>>
    >>> class User(BaseModel):
    >>>     name: str
    >>>     age: age_type

    """

    def __add__(self: "Term", other: "Term") -> "Sequence":
        if is_str_instance(other):
            other = String(str(other))

        return Sequence([self, other])

    def __radd__(self: "Term", other: "Term") -> "Sequence":
        if is_str_instance(other):
            other = String(str(other))

        return Sequence([other, self])

    def __or__(self: "Term", other: "Term") -> "Alternatives":
        if is_str_instance(other):
            other = String(str(other))

        return Alternatives([self, other])

    def __ror__(self: "Term", other: "Term") -> "Alternatives":
        if is_str_instance(other):
            other = String(str(other))

        return Alternatives([other, self])

    def __get_validator__(self, _core_schema):
        def validate(input_value):
            return self.validate(input_value)

        return validate

    def __get_pydantic_core_schema__(
        self, source_type: Any, handler: GetCoreSchemaHandler
    ) -> cs.CoreSchema:
        return cs.no_info_plain_validator_function(lambda value: self.validate(value))

    def __get_pydantic_json_schema__(
        self, core_schema: cs.CoreSchema, handler: GetJsonSchemaHandler
    ) -> JsonSchemaValue:
        return {"type": "string", "pattern": to_regex(self)}

    def validate(self, value: str) -> str:
        pattern = to_regex(self)
        compiled = re.compile(pattern)
        if not compiled.fullmatch(str(value)):
            raise ValueError(
                f"Input should be in the language of the regular expression {pattern}"
            )
        return value

    def matches(self, value: str) -> bool:
        """Check that a given value is in the language defined by the Term.

        We make the assumption that the language defined by the term can
        be defined with a regular expression.

        """
        pattern = to_regex(self)
        compiled = re.compile(pattern)
        if compiled.fullmatch(str(value)):
            return True
        return False

    def display_ascii_tree(self, indent="", is_last=True) -> str:
        """Display the regex tree in ASCII format."""
        branch = "└── " if is_last else "├── "
        result = indent + branch + self._display_node() + "\n"

        # Calculate the new indent for children
        new_indent = indent + ("    " if is_last else "│   ")

        # Let each subclass handle its children
        result += self._display_children(new_indent)
        return result

    def _display_node(self):
        raise NotImplementedError

    def _display_children(self, indent: str) -> str:
        """Display the children of this node. Override in subclasses with children."""
        return ""

    def __str__(self):
        return self.display_ascii_tree()

    def optional(self) -> "Optional":
        return optional(self)

    def exactly(self, count: int) -> "QuantifyExact":
        return exactly(count, self)

    def at_least(self, count: int) -> "QuantifyMinimum":
        return at_least(count, self)

    def at_most(self, count: int) -> "QuantifyMaximum":
        return at_most(count, self)

    def between(self, min_count: int, max_count: int) -> "QuantifyBetween":
        return between(min_count, max_count, self)

    def one_or_more(self) -> "KleenePlus":
        return one_or_more(self)

    def zero_or_more(self) -> "KleeneStar":
        return zero_or_more(self)

`display_ascii_tree(indent='', is_last=True)`

Display the regex tree in ASCII format.

Source code in outlines/types/dsl.py

def display_ascii_tree(self, indent="", is_last=True) -> str:
    """Display the regex tree in ASCII format."""
    branch = "└── " if is_last else "├── "
    result = indent + branch + self._display_node() + "\n"

    # Calculate the new indent for children
    new_indent = indent + ("    " if is_last else "│   ")

    # Let each subclass handle its children
    result += self._display_children(new_indent)
    return result

`matches(value)`

Check that a given value is in the language defined by the Term.

We make the assumption that the language defined by the term can be defined with a regular expression.

Source code in outlines/types/dsl.py

def matches(self, value: str) -> bool:
    """Check that a given value is in the language defined by the Term.

    We make the assumption that the language defined by the term can
    be defined with a regular expression.

    """
    pattern = to_regex(self)
    compiled = re.compile(pattern)
    if compiled.fullmatch(str(value)):
        return True
    return False

`at_least(count, term)`

Repeat the term at least count times.

Source code in outlines/types/dsl.py

def at_least(count: int, term: Union[Term, str]) -> QuantifyMinimum:
    """Repeat the term at least `count` times."""
    term = String(term) if isinstance(term, str) else term
    return QuantifyMinimum(term, count)

`at_most(count, term)`

Repeat the term exactly count times.

Source code in outlines/types/dsl.py

def at_most(count: int, term: Union[Term, str]) -> QuantifyMaximum:
    """Repeat the term exactly `count` times."""
    term = String(term) if isinstance(term, str) else term
    return QuantifyMaximum(term, count)

`either(*terms)`

Represents an alternative between different terms or strings.

This factory function automatically translates string arguments into String objects.

Source code in outlines/types/dsl.py

def either(*terms: Union[str, Term]):
    """Represents an alternative between different terms or strings.

    This factory function automatically translates string arguments
    into `String` objects.

    """
    terms = [String(arg) if isinstance(arg, str) else arg for arg in terms]
    return Alternatives(terms)

`exactly(count, term)`

Repeat the term exactly count times.

Source code in outlines/types/dsl.py

def exactly(count: int, term: Union[Term, str]) -> QuantifyExact:
    """Repeat the term exactly `count` times."""
    term = String(term) if isinstance(term, str) else term
    return QuantifyExact(term, count)

`python_types_to_terms(ptype, recursion_depth=0)`

Convert Python types to Outlines DSL terms that constrain LLM output.

Parameters:

Name	Type	Description	Default
`ptype`	`Any`	The Python type to convert	required
`recursion_depth`	`int`	Current recursion depth to prevent infinite recursion	`0`

Returns:

Type	Description
`Term`	The corresponding DSL `Term` instance.

Source code in outlines/types/dsl.py

def python_types_to_terms(ptype: Any, recursion_depth: int = 0) -> Term:
    """Convert Python types to Outlines DSL terms that constrain LLM output.

    Parameters
    ----------
    ptype
        The Python type to convert
    recursion_depth
        Current recursion depth to prevent infinite recursion

    Returns
    -------
    Term
        The corresponding DSL `Term` instance.

    """
    if recursion_depth > 10:
        raise RecursionError(
            f"Maximum recursion depth exceeded when converting {ptype}. "
            "This might be due to a recursive type definition."
        )

    # First handle Term instances
    if isinstance(ptype, Term):
        return ptype

    # Basic types
    if is_int(ptype):
        return types.integer
    elif is_float(ptype):
        return types.number
    elif is_bool(ptype):
        return types.boolean
    elif is_str(ptype):
        return types.string
    elif is_native_dict(ptype):
        return CFG(grammars.json)
    elif is_time(ptype):
        return types.time
    elif is_date(ptype):
        return types.date
    elif is_datetime(ptype):
        return types.datetime

    # Basic type instances
    if is_str_instance(ptype):
        return String(ptype)
    elif is_int_instance(ptype) or is_float_instance(ptype):
        return Regex(str(ptype))

    # Structured types
    structured_type_checks = [
        lambda x: is_dataclass(x),
        lambda x: is_typed_dict(x),
        lambda x: is_pydantic_model(x),
    ]
    if any(check(ptype) for check in structured_type_checks):
        schema = TypeAdapter(ptype).json_schema()
        return JsonSchema(schema)

    elif is_genson_schema_builder(ptype):
        schema = ptype.to_json()
        return JsonSchema(schema)

    if is_enum(ptype):
        return Alternatives(
            [
                python_types_to_terms(member, recursion_depth + 1)
                for member in _get_enum_members(ptype)
            ]
        )

    args = get_args(ptype)
    if is_literal(ptype):
        return _handle_literal(args)
    elif is_union(ptype):
        return _handle_union(args, recursion_depth)
    elif is_typing_list(ptype):
        return _handle_list(args, recursion_depth)
    elif is_typing_tuple(ptype):
        return _handle_tuple(args, recursion_depth)
    elif is_typing_dict(ptype):
        return _handle_dict(args, recursion_depth)

    if is_callable(ptype):
        return JsonSchema(get_schema_from_signature(ptype))

    type_name = getattr(ptype, "__name__", ptype)
    raise TypeError(
        f"Type {type_name} is currently not supported. Please open an issue: "
        "https://github.com/dottxt-ai/outlines/issues"
    )

`to_regex(term)`

Convert a term to a regular expression.

We only consider self-contained terms that do not refer to another rule.

Parameters:

Name	Type	Description	Default
`term`	`Term`	The term to convert to a regular expression.	required

Returns:

Type	Description
`str`	The regular expression as a string.

Source code in outlines/types/dsl.py

def to_regex(term: Term) -> str:
    """Convert a term to a regular expression.

    We only consider self-contained terms that do not refer to another rule.

    Parameters
    ----------
    term
        The term to convert to a regular expression.

    Returns
    -------
    str
        The regular expression as a string.

    """
    if isinstance(term, String):
        return re.escape(term.value)
    elif isinstance(term, Regex):
        return f"({term.pattern})"
    elif isinstance(term, JsonSchema):
        regex_str = build_regex_from_schema(term.schema, term.whitespace_pattern)
        return f"({regex_str})"
    elif isinstance(term, Choice):
        regexes = [to_regex(python_types_to_terms(item)) for item in term.items]
        return f"({'|'.join(regexes)})"
    elif isinstance(term, KleeneStar):
        return f"({to_regex(term.term)})*"
    elif isinstance(term, KleenePlus):
        return f"({to_regex(term.term)})+"
    elif isinstance(term, Optional):
        return f"({to_regex(term.term)})?"
    elif isinstance(term, Alternatives):
        regexes = [to_regex(subterm) for subterm in term.terms]
        return f"({'|'.join(regexes)})"
    elif isinstance(term, Sequence):
        regexes = [to_regex(subterm) for subterm in term.terms]
        return f"{''.join(regexes)}"
    elif isinstance(term, QuantifyExact):
        return f"({to_regex(term.term)}){{{term.count}}}"
    elif isinstance(term, QuantifyMinimum):
        return f"({to_regex(term.term)}){{{term.min_count},}}"
    elif isinstance(term, QuantifyMaximum):
        return f"({to_regex(term.term)}){{,{term.max_count}}}"
    elif isinstance(term, QuantifyBetween):
        return f"({to_regex(term.term)}){{{term.min_count},{term.max_count}}}"
    else:
        raise TypeError(
            f"Cannot convert object {repr(term)} to a regular expression."
        )

`json_schema_utils`

Convert JSON Schema dicts to Python types.

`json_schema_dict_to_dataclass(schema, name=None)`

Convert a JSON Schema dict into a dataclass.

Parameters:

Name	Type	Description	Default
`schema`	`dict`	The JSON Schema dict to convert to a dataclass	required
`name`	`Optional[str]`	The name of the dataclass	`None`

Returns:

Type	Description
`type`	The dataclass

Source code in outlines/types/json_schema_utils.py

def json_schema_dict_to_dataclass(
    schema: dict,
    name: Optional[str] = None
) -> type:
    """Convert a JSON Schema dict into a dataclass.

    Parameters
    ----------
    schema: dict
        The JSON Schema dict to convert to a dataclass
    name: Optional[str]
        The name of the dataclass

    Returns
    -------
    type
        The dataclass

    """
    required = set(schema.get("required", []))
    properties = schema.get("properties", {})

    annotations: Dict[str, Any] = {}
    defaults: Dict[str, Any] = {}

    for property, details in properties.items():
        typ = schema_type_to_python(details, "dataclass")
        annotations[property] = typ

        if property not in required:
            defaults[property] = None

    class_dict = {
        '__annotations__': annotations,
        '__module__': __name__,
    }

    for property, default_val in defaults.items():
        class_dict[property] = field(default=default_val)

    cls = type(name or "AnonymousDataclass", (), class_dict)
    return dataclass(cls)

`json_schema_dict_to_pydantic(schema, name=None)`

Convert a JSON Schema dict into a Pydantic BaseModel class.

Parameters:

Name	Type	Description	Default
`schema`	`dict`	The JSON Schema dict to convert to a Pydantic BaseModel	required
`name`	`Optional[str]`	The name of the Pydantic BaseModel	`None`

Returns:

Type	Description
`type[BaseModel]`	The Pydantic BaseModel class

Source code in outlines/types/json_schema_utils.py

def json_schema_dict_to_pydantic(
    schema: dict,
    name: Optional[str] = None
) -> type[BaseModel]:
    """Convert a JSON Schema dict into a Pydantic BaseModel class.

    Parameters
    ----------
    schema: dict
        The JSON Schema dict to convert to a Pydantic BaseModel
    name: Optional[str]
        The name of the Pydantic BaseModel

    Returns
    -------
    type[BaseModel]
        The Pydantic BaseModel class

    """
    required = set(schema.get("required", []))
    properties = schema.get("properties", {})

    field_definitions: Dict[str, Any] = {}

    for property, details in properties.items():
        typ = schema_type_to_python(details, "pydantic")
        if property not in required:
            field_definitions[property] = (Optional[typ], None)
        else:
            field_definitions[property] = (typ, ...)

    return create_model(name or "AnonymousPydanticModel", **field_definitions)

`json_schema_dict_to_typeddict(schema, name=None)`

Convert a JSON Schema dict into a TypedDict class.

Parameters:

Name	Type	Description	Default
`schema`	`dict`	The JSON Schema dict to convert to a TypedDict	required
`name`	`Optional[str]`	The name of the TypedDict	`None`

Returns:

Type	Description
`_TypedDictMeta`	The TypedDict class

Source code in outlines/types/json_schema_utils.py

def json_schema_dict_to_typeddict(
    schema: dict,
    name: Optional[str] = None
) -> _TypedDictMeta:
    """Convert a JSON Schema dict into a TypedDict class.

    Parameters
    ----------
    schema: dict
        The JSON Schema dict to convert to a TypedDict
    name: Optional[str]
        The name of the TypedDict

    Returns
    -------
    _TypedDictMeta
        The TypedDict class

    """
    required = set(schema.get("required", []))
    properties = schema.get("properties", {})

    annotations: Dict[str, Any] = {}

    for property, details in properties.items():
        typ = schema_type_to_python(details, "typeddict")
        if property not in required:
            typ = Optional[typ]
        annotations[property] = typ

    return TypedDict(name or "AnonymousTypedDict", annotations)  # type: ignore

`schema_type_to_python(schema, caller_target_type)`

Get a Python type from a JSON Schema dict.

Parameters:

Name	Type	Description	Default
`schema`	`dict`	The JSON Schema dict to convert to a Python type	required
`caller_target_type`	`Literal['pydantic', 'typeddict', 'dataclass']`	The type of the caller	required

Returns:

Type	Description
`Any`	The Python type

Source code in outlines/types/json_schema_utils.py

def schema_type_to_python(
    schema: dict,
    caller_target_type: Literal["pydantic", "typeddict", "dataclass"]
) -> Any:
    """Get a Python type from a JSON Schema dict.

    Parameters
    ----------
    schema: dict
        The JSON Schema dict to convert to a Python type
    caller_target_type: Literal["pydantic", "typeddict", "dataclass"]
        The type of the caller

    Returns
    -------
    Any
        The Python type

    """
    if "enum" in schema:
        values = schema["enum"]
        return Literal[tuple(values)]

    t = schema.get("type")

    if t == "string":
        return str
    elif t == "integer":
        return int
    elif t == "number":
        return float
    elif t == "boolean":
        return bool
    elif t == "array":
        items = schema.get("items", {})
        if items:
            item_type = schema_type_to_python(items, caller_target_type)
        else:
            item_type = Any
        return List[item_type]  # type: ignore
    elif t == "object":
        name = schema.get("title")
        if caller_target_type == "pydantic":
            return json_schema_dict_to_pydantic(schema, name)
        elif caller_target_type == "typeddict":
            return json_schema_dict_to_typeddict(schema, name)
        elif caller_target_type == "dataclass":
            return json_schema_dict_to_dataclass(schema, name)

    return Any

`locale`

Locale-specific regex patterns.

`us`

Locale-specific regex patterns for the United States.

`utils`

Utility functions for the types module.

`get_schema_from_signature(fn)`

Turn a function signature into a JSON schema.

Every JSON object valid to the output JSON Schema can be passed to fn using the ** unpacking syntax.

Source code in outlines/types/utils.py

def get_schema_from_signature(fn: Callable) -> dict:
    """Turn a function signature into a JSON schema.

    Every JSON object valid to the output JSON Schema can be passed
    to `fn` using the ** unpacking syntax.

    """
    signature = inspect.signature(fn)
    arguments = {}
    for name, arg in signature.parameters.items():
        if arg.annotation == inspect._empty:
            raise ValueError("Each argument must have a type annotation")
        else:
            arguments[name] = (arg.annotation, ...)

    try:
        fn_name = fn.__name__
    except Exception as e:
        fn_name = "Arguments"
        warnings.warn(
            f"The function name could not be determined. Using default name 'Arguments' instead. For debugging, here is exact error:\n{e}",
            category=UserWarning,
        )
    model = create_model(fn_name, **arguments)

    return model.model_json_schema()

outlines

Anthropic

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs)

generate_stream(model_input, output_type=None, **inference_kwargs)

AsyncDottxt

__init__(client, model=None)

generate(model_input, output_type=None, **inference_kwargs) async

AsyncLMStudio

__init__(client, model_name=None)

close() async

generate(model_input, output_type=None, **kwargs) async

generate_stream(model_input, output_type=None, **kwargs) async

AsyncMistral

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs) async

generate_stream(model_input, output_type=None, **inference_kwargs) async

AsyncOllama

__init__(client, model_name=None)

generate(model_input, output_type=None, **kwargs) async

generate_stream(model_input, output_type=None, **kwargs) async

AsyncOpenAI

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs) async

generate_stream(model_input, output_type=None, **inference_kwargs) async

AsyncSGLang

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs) async

generate_stream(model_input, output_type=None, **inference_kwargs) async

AsyncTGI

__init__(client)

generate(model_input, output_type=None, **inference_kwargs) async

generate_stream(model_input, output_type=None, **inference_kwargs) async

AsyncVLLM

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs) async

generate_stream(model_input, output_type=None, **inference_kwargs) async

Dottxt

__init__(client, model=None)

generate(model_input, output_type=None, **inference_kwargs)

Gemini

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs)

generate_stream(model_input, output_type=None, **inference_kwargs)

LMStudio

__init__(client, model_name=None)

generate(model_input, output_type=None, **kwargs)

generate_stream(model_input, output_type=None, **kwargs)

LlamaCpp

__init__(model, chat_mode=True)

generate(model_input, output_type=None, **inference_kwargs)

generate_stream(model_input, output_type=None, **inference_kwargs)

MLXLM

__init__(model, tokenizer)

generate(model_input, output_type=None, **kwargs)

generate_batch(model_input, output_type=None, **kwargs)

generate_stream(model_input, output_type=None, **kwargs)

Mistral

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs)

generate_stream(model_input, output_type=None, **inference_kwargs)

Model

__call__(model_input, output_type=None, backend=None, **inference_kwargs)

batch(model_input, output_type=None, backend=None, **inference_kwargs)

generate(model_input, output_type=None, **inference_kwargs) abstractmethod

generate_batch(model_input, output_type=None, **inference_kwargs) abstractmethod

generate_stream(model_input, output_type=None, **inference_kwargs) abstractmethod

stream(model_input, output_type=None, backend=None, **inference_kwargs)

ModelTypeAdapter

format_input(model_input) abstractmethod

format_output_type(output_type=None) abstractmethod

Ollama

__init__(client, model_name=None)

generate(model_input, output_type=None, **kwargs)

generate_stream(model_input, output_type=None, **kwargs)

OpenAI

__init__(client, model_name=None)

generate(model_input, output_type=None, **inference_kwargs)

generate_stream(model_input, output_type=None, **inference_kwargs)

SGLang

`Anthropic`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)`

`generate_stream(model_input, output_type=None, **inference_kwargs)`

`AsyncDottxt`

`init(client, model=None)`

`generate(model_input, output_type=None, **inference_kwargs)` `async`

`AsyncLMStudio`

`init(client, model_name=None)`

`close()` `async`

`generate(model_input, output_type=None, **kwargs)` `async`

`generate_stream(model_input, output_type=None, **kwargs)` `async`

`AsyncMistral`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)` `async`

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

`AsyncOllama`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **kwargs)` `async`

`generate_stream(model_input, output_type=None, **kwargs)` `async`

`AsyncOpenAI`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)` `async`

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

`AsyncSGLang`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)` `async`

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

`AsyncTGI`

`init(client)`

`generate(model_input, output_type=None, **inference_kwargs)` `async`

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

`AsyncVLLM`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)` `async`

`generate_stream(model_input, output_type=None, **inference_kwargs)` `async`

`Dottxt`

`init(client, model=None)`

`generate(model_input, output_type=None, **inference_kwargs)`

`Gemini`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)`

`generate_stream(model_input, output_type=None, **inference_kwargs)`

`LMStudio`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **kwargs)`

`generate_stream(model_input, output_type=None, **kwargs)`

`LlamaCpp`

`init(model, chat_mode=True)`

`generate(model_input, output_type=None, **inference_kwargs)`

`generate_stream(model_input, output_type=None, **inference_kwargs)`

`MLXLM`

`init(model, tokenizer)`

`generate(model_input, output_type=None, **kwargs)`

`generate_batch(model_input, output_type=None, **kwargs)`

`generate_stream(model_input, output_type=None, **kwargs)`

`Mistral`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)`

`generate_stream(model_input, output_type=None, **inference_kwargs)`

`Model`

`call(model_input, output_type=None, backend=None, **inference_kwargs)`

`batch(model_input, output_type=None, backend=None, **inference_kwargs)`

`generate(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

`generate_batch(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

`generate_stream(model_input, output_type=None, **inference_kwargs)` `abstractmethod`

`stream(model_input, output_type=None, backend=None, **inference_kwargs)`

`ModelTypeAdapter`

`format_input(model_input)` `abstractmethod`

`format_output_type(output_type=None)` `abstractmethod`

`Ollama`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **kwargs)`

`generate_stream(model_input, output_type=None, **kwargs)`

`OpenAI`

`init(client, model_name=None)`

`generate(model_input, output_type=None, **inference_kwargs)`

`generate_stream(model_input, output_type=None, **inference_kwargs)`

`SGLang`

`init(client, model_name=None)`