dottxt-ai · RobinPicard · May 22, 2025
diff --git a/docs/reference/models/sglang.md b/docs/reference/models/sglang.md
@@ -1,12 +1,12 @@
-# SgLang
+# SGLang
 
 ## Prerequisites
 
-The Outlines `SgLang` model is inteded to be used along with a separate sglang server (running either locally or remotely). Make sure you have an sglang server running before using the `SgLang` model. As the sglang client relies on the `openai` python sdk, you need to have the `openai` package installed.
+The Outlines `SGLang` model is inteded to be used along with a separate sglang server (running either locally or remotely). Make sure you have an sglang server running before using the `SGLang` model. As the sglang client relies on the `openai` python sdk, you need to have the `openai` package installed.
 
 ## Initialize the model
 
-To load the model, you can use the `from_sglang` function. The argument of the function is either an `OpenAI` or `AsyncOpenAI` instance from the `openai` library. Based on whether the `openai` instance is synchronous or asynchronous, you will receive a `SgLang` or `AsyncSgLang` model instance.
+To load the model, you can use the `from_sglang` function. The argument of the function is either an `OpenAI` or `AsyncOpenAI` instance from the `openai` library. Based on whether the `openai` instance is synchronous or asynchronous, you will receive a `SGLang` or `AsyncSGLang` model instance.
 
 ```python
 import openai
@@ -16,10 +16,10 @@ sync_openai_client = openai.OpenAI(base_url="...")
 async_openai_client = openai.AsyncOpenAI(base_url="...")
 
 sync_model = outlines.from_sglang(sync_openai_client, "qwen/qwen2.5-0.5b-instruct")
-print(type(sync_model)) # <class 'outlines.models.sglang.SgLang'>
+print(type(sync_model)) # <class 'outlines.models.sglang.SGLang'>
 
 async_model = outlines.from_sglang(async_openai_client, "qwen/qwen2.5-0.5b-instruct")
-print(type(async_model)) # <class 'outlines.models.sglang.AsyncSgLang'>
+print(type(async_model)) # <class 'outlines.models.sglang.AsyncSGLang'>
 ```
 
 ## Generate text
@@ -36,7 +36,7 @@ answer = sync_model("Create a character.", output_type=Character)
 answer = await async_model("Create a character.", output_type=Character)
 ```
 
-The `SgLang` model supports also supports streaming.
+The `SGLang` model supports also supports streaming.
 
 ```python
 for chunk in sync_model.stream("Write a short story about a cat.", max_tokens=100):

diff --git a/docs_new/getting_started/index.md b/docs_new/getting_started/index.md
@@ -265,10 +265,10 @@ Outlines wraps around a variety of LLM inference backends, described in the [ins
     response = model("Create a character.", Person) # { "name": "John", "age": 30 }
     ```
 
-=== "SgLang"
+=== "SGLang"
 
     ```python
-    # SgLang
+    # SGLang
 
     import outlines
     from openai import OpenAI
@@ -280,8 +280,8 @@ Outlines wraps around a variety of LLM inference backends, described in the [ins
         name: str
         age: int
 
-    # You must have a separete SgLang server running
-    # Create an OpenAI client with the base URL of the SgLang server
+    # You must have a separete SGLang server running
+    # Create an OpenAI client with the base URL of the SGLang server
     openai_client = OpenAI(base_url="http://localhost:11434/v1")
 
     # Create an Outlines model

diff --git a/docs_new/reference/index.md b/docs_new/reference/index.md
@@ -0,0 +1,3 @@
+# Reference
+
+This needs to contain a list of all the misc API reference pages.
diff --git a/outlines/applications.py b/outlines/applications.py
@@ -1,8 +1,15 @@
+"""Encapsulate a prompt template and an output type into a reusable object."""
+
 from typing import Any, Callable, Dict, Optional, Union
 
-from outlines.generator import BlackBoxGenerator, Generator, SteerableGenerator
+from outlines.generator import (
+    BlackBoxGenerator,
+    Generator,
+    SteerableGenerator,
+    AsyncBlackBoxGenerator,
+)
+from outlines.models.base import Model
 from outlines.templates import Template
-from outlines.models import BlackBoxModel, SteerableModel
 
 
 class Application:
@@ -32,8 +39,8 @@ class OutputModel(BaseModel):
         result: int
 
     model = models.from_transformers(
-        AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"),
-        AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+        AutoModelForCausalLM.from_pretrained("microsoft/phi-3-mini-4k-instruct"),
+        AutoTokenizer.from_pretrained("microsoft/phi-3-mini-4k-instruct")
     )
 
     template_string = "What is 2 times {{ num }}?"
@@ -44,19 +51,49 @@ class OutputModel(BaseModel):
     result = application(model, {"num": 3}, max_new_tokens=20)
     print(result)  # Expected output: { "result" : 6 }
     ```
+
     """
-    def __init__(self, template: Union[Template, Callable], output_type: Any):
+    def __init__(
+        self,
+        template: Union[Template, Callable],
+        output_type: Optional[Any] = None,
+    ):
+        """
+        Parameters
+        ----------
+        template
+            The template to use to build the prompt.
+        output_type
+            The output type provided to the generator.
+        """
         self.template = template
         self.output_type = output_type
-        self.model: Optional[Union[BlackBoxModel, SteerableModel]] = None
-        self.generator: Optional[Union[BlackBoxGenerator, SteerableGenerator]] = None
+        self.generator: Optional[Union[
+            BlackBoxGenerator, SteerableGenerator, AsyncBlackBoxGenerator
+        ]] = None
+        self.model: Optional[Model] = None
 
     def __call__(
         self,
-        model: Union[BlackBoxModel, SteerableModel],
+        model: Model,
         template_vars: Dict[str, Any],
         **inference_kwargs
-    ):
+    ) -> Any:
+        """
+        Parameters
+        ----------
+        model
+            The model to use to generate the response.
+        template_vars
+            The variables to be substituted in the template.
+        **inference_kwargs
+            Additional keyword arguments to pass to the model.
+
+        Returns
+        -------
+        Any
+            The generated response.
+        """
         if model is None:
             raise ValueError("you must provide a model")
         # We save the generator to avoid creating a new one for each call.

diff --git a/outlines/caching.py b/outlines/caching.py
@@ -1,3 +1,5 @@
+"""Caching and memoization of function calls."""
+
 import asyncio
 import contextlib
 import functools

diff --git a/outlines/fsm/__init__.py b/outlines/fsm/__init__.py
@@ -0,0 +1 @@
+"""Finite state machines builder for CFG generation."""
diff --git a/outlines/fsm/parsing.py b/outlines/fsm/parsing.py
@@ -1,3 +1,5 @@
+"""Build a parser from a grammar to create a finite state machine."""
+
 from copy import copy, deepcopy
 from dataclasses import dataclass
 from functools import lru_cache

diff --git a/outlines/generator.py b/outlines/generator.py
@@ -12,6 +12,7 @@
     AsyncBlackBoxModel,
     BlackBoxModel,
     SteerableModel,
+    SyncBlackBoxModel,
 )
 from outlines.models.base import AsyncModel, Model
 from outlines.processors import (
@@ -28,34 +29,27 @@
 class BlackBoxGenerator:
     """Synchronous generator for which we don't control constrained
     generation.
-
-    The output type provided is not compiled into a logits processor, but is
-    instead directly passed on to the model.
-
     """
     output_type: Optional[Any]
 
-    def __init__(self, model: BlackBoxModel, output_type: Optional[Any]):
+    def __init__(self, model: SyncBlackBoxModel, output_type: Optional[Any]):
         """
         Parameters
         ----------
         model
             An instance of an Outlines model.
         output_type
-            The output type that will be used to constrain the generation.
-
+            The output type expressed as a Python type
         """
         self.model = model
         self.output_type = output_type
-
         if isinstance(self.output_type, FSM):
             raise NotImplementedError(
                 "FSM generation is not supported for API-based models"
             )
 
     def __call__(self, prompt: Any, **inference_kwargs) -> Any:
-        """Generate a response from the model.
-
+        """
         Parameters
         ----------
         prompt
@@ -67,15 +61,13 @@ def __call__(self, prompt: Any, **inference_kwargs) -> Any:
         -------
         Any
             The response generated by the model.
-
         """
         return self.model.generate(
             prompt, self.output_type, **inference_kwargs
         )
 
     def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]:
-        """Generate a stream of responses from the model.
-
+        """
         Parameters
         ----------
         prompt
@@ -87,7 +79,6 @@ def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]:
         -------
         Any
             The response generated by the model.
-
         """
         return self.model.generate_stream(
             prompt, self.output_type, **inference_kwargs
@@ -97,10 +88,6 @@ def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]:
 class AsyncBlackBoxGenerator:
     """Asynchronous generator for which we don't control constrained
     generation.
-
-    The output type provided is not compiled into a logits processor, but is
-    instead directly passed on to the model.
-
     """
     output_type: Optional[Any]
 
@@ -111,20 +98,17 @@ def __init__(self, model: AsyncBlackBoxModel, output_type: Optional[Any]):
         model
             An instance of an Outlines model.
         output_type
-            The output type that will be used to constrain the generation.
-
+            The output type expressed as a Python type
         """
         self.model = model
         self.output_type = output_type
-
         if isinstance(self.output_type, FSM):
             raise NotImplementedError(
                 "FSM generation is not supported for API-based models"
             )
 
     async def __call__(self, prompt: Any, **inference_kwargs) -> Any:
-        """Generate a response from the model.
-
+        """
         Parameters
         ----------
         prompt
@@ -136,7 +120,6 @@ async def __call__(self, prompt: Any, **inference_kwargs) -> Any:
         -------
         Any
             The response generated by the model.
-
         """
         return await self.model.generate(
             prompt, self.output_type, **inference_kwargs
@@ -145,8 +128,7 @@ async def __call__(self, prompt: Any, **inference_kwargs) -> Any:
     async def stream(
         self, prompt: Any, **inference_kwargs
     ) -> AsyncIterator[Any]:
-        """Generate a stream of responses from the model.
-
+        """
         Parameters
         ----------
         prompt
@@ -158,7 +140,6 @@ async def stream(
         -------
         Any
             The response generated by the model.
-
         """
         async for chunk in self.model.generate_stream(  # pragma: no cover
             prompt, self.output_type, **inference_kwargs
@@ -193,7 +174,6 @@ def __init__(self, model: SteerableModel, output_type: Optional[Any]):
             An instance of an Outlines model.
         output_type
             The output type expressed as a Python type
-
         """
         self.model = model
         if output_type is None:
@@ -227,15 +207,13 @@ def __init__(self, model: SteerableModel, output_type: Optional[Any]):
     def from_processor(
         cls, model: SteerableModel, processor: OutlinesLogitsProcessor
     ):
-        """Create a generator from a logits processor.
-
+        """
         Parameters
         ----------
         model
             An instance of an Outlines model.
         processor
             An instance of an OutlinesLogitsProcessor.
-
         """
         if not isinstance(processor, OutlinesLogitsProcessor):
             raise TypeError(
@@ -249,8 +227,7 @@ def from_processor(
         return instance
 
     def __call__(self, prompt: Any, **inference_kwargs) -> Any:
-        """Generate a response from the model.
-
+        """
         Parameters
         ----------
         prompt
@@ -262,15 +239,13 @@ def __call__(self, prompt: Any, **inference_kwargs) -> Any:
         -------
         Any
             The response generated by the model.
-
         """
         return self.model.generate(
             prompt, self.logits_processor, **inference_kwargs
         )
 
     def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]:
-        """Generate a stream of responses from the model.
-
+        """
         Parameters
         ----------
         prompt
@@ -282,7 +257,6 @@ def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]:
         -------
         Any
             The response generated by the model.
-
         """
         return self.model.generate_stream(
             prompt, self.logits_processor, **inference_kwargs
@@ -295,7 +269,8 @@ def Generator(
     *,
     processor: Optional[OutlinesLogitsProcessor] = None,
 ) -> Union[SteerableGenerator, BlackBoxGenerator, AsyncBlackBoxGenerator]:
-    """Create a generator for the given model and output parameters.
+    """
+    Create a generator for the given model and output parameters.
 
     The 2 parameters output_type and processor are mutually exclusive. The
     parameters processor is only supported for SteerableModel instances
@@ -315,7 +290,6 @@ def Generator(
     -------
     Union[SteerableGenerator, BlackBoxGenerator, AsyncBlackBoxGenerator]
         A generator instance.
-
     """
     provided_output_params = sum(
         param is not None
@@ -338,10 +312,10 @@ def Generator(
             )
         if isinstance(model, AsyncBlackBoxModel): # type: ignore
             return AsyncBlackBoxGenerator(model, output_type) # type: ignore
-        elif isinstance(model, BlackBoxModel): # type: ignore
+        elif isinstance(model, SyncBlackBoxModel): # type: ignore
             return BlackBoxGenerator(model, output_type) # type: ignore
         else:
             raise ValueError(
                 "The model argument must be an instance of "
-                "SteerableModel, BlackBoxModel or AsyncBlackBoxModel"
+                "SteerableModel or BlackBoxModel"
             )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Reference

		This needs to contain a list of all the misc API reference pages.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Finite state machines builder for CFG generation."""