From c48f260a23bd0ea65614638f4bb56958384016a9 Mon Sep 17 00:00:00 2001
From: Cameron Pfiffer <cameron@pfiffer.org>
Date: Fri, 10 Jan 2025 14:03:02 -0800
Subject: [PATCH 1/7] Update README to include chat templating

The existing README has underwhelming or incorrect results (Example is underwhelming #1347) due to lack of templating for instruct models.

This adds special tokens for each instruct model call, as well as provide comments on how to obtain/produce special tokens.
---
 README.md | 95 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 78 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index c7f071503..a17ffc01d 100644
--- a/README.md
+++ b/README.md
@@ -64,6 +64,15 @@ is to ensure that there is a well-defined interface between their output and
 user-defined code. **Outlines** provides ways to control the generation of
 language models to make their output more predictable.
 
+The following methods of structured generation are supported:
+
+- [Multiple choices](#multiple-choices)
+- [Type constraints](#type-constraint)
+- [Efficient regex-structured generation](#efficient-regex-structured-generation)
+- [Efficient JSON generation following a Pydantic model](#efficient-json-generation-following-a-pydantic-model)
+- [Using context-free grammars to guide generation](#using-context-free-grammars-to-guide-generation)
+- [Open functions](#open-functions)
+
 ### Multiple choices
 
 You can reduce the completion to a choice between multiple possibilities:
@@ -71,19 +80,32 @@ You can reduce the completion to a choice between multiple possibilities:
 ``` python
 import outlines
 
-model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
+model_name = "HuggingFaceTB/SmolLM2-360M-Instruct"
+model = outlines.models.transformers(model_name)
 
-prompt = """You are a sentiment-labelling assistant.
-Is the following review positive or negative?
+# You must apply the chat template tokens to the prompt!
+# See below for an example.
+prompt = """
+<|im_start|>system
+You extract information from text.
+<|im_end|>
 
-Review: This restaurant is just awesome!
+<|im_start|>user
+What food does the following text describe?
+
+Text: I really really really want pizza.
+<|im_end|>
+<|im_start|>assistant
 """
 
-generator = outlines.generate.choice(model, ["Positive", "Negative"])
+generator = outlines.generate.choice(model, ["Pizza", "Pasta", "Salad", "Dessert"])
 answer = generator(prompt)
+print(f'{answer=}')
+
+# answer=Pizza
 ```
 
-### Type constraint
+### Type constraints
 
 You can instruct the model to only return integers or floats:
 
@@ -116,7 +138,17 @@ import outlines
 
 model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
 
-prompt = "What is the IP address of the Google DNS servers? "
+prompt = """
+<|im_start|>system You are a helpful assistant.
+<|im_end|>
+
+<|im_start|>user
+What is an IP address of the Google DNS servers? 
+<|im_end|>
+<|im_start|>assistant
+The IP address of a Google DNS server is 
+
+"""
 
 generator = outlines.generate.text(model)
 unstructured = generator(prompt, max_tokens=30)
@@ -124,19 +156,17 @@ unstructured = generator(prompt, max_tokens=30)
 generator = outlines.generate.regex(
     model,
     r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)",
+    sampler=outlines.samplers.greedy(),
 )
 structured = generator(prompt, max_tokens=30)
 
 print(unstructured)
-# What is the IP address of the Google DNS servers?
-#
-# Passive DNS servers are at DNS servers that are private.
-# In other words, both IP servers are private. The database
-# does not contain Chelsea Manning
+# 8.8.8.8
+# 
+# <|im_end|>
 
 print(structured)
-# What is the IP address of the Google DNS servers?
-# 2.2.6.1
+# 8.8.8.8
 ```
 
 Unlike other libraries, regex-structured generation in Outlines is almost as fast
@@ -144,15 +174,13 @@ as non-structured generation.
 
 ### Efficient JSON generation following a Pydantic model
 
-Outlines  allows to guide the generation process so the output is *guaranteed* to follow a [JSON schema](https://json-schema.org/) or [Pydantic model](https://docs.pydantic.dev/latest/):
+Outlines users can guide the generation process so the output is *guaranteed* to follow a [JSON schema](https://json-schema.org/) or [Pydantic model](https://docs.pydantic.dev/latest/):
 
 ```python
 from enum import Enum
 from pydantic import BaseModel, constr
 
 import outlines
-import torch
-
 
 class Weapon(str, Enum):
     sword = "sword"
@@ -335,6 +363,39 @@ prompt = labelling("Just awesome", examples)
 answer = outlines.generate.text(model)(prompt, max_tokens=100)
 ```
 
+### Chat template tokens
+
+Outlines does not manage chat templating tokens when using instruct models. You must apply the chat template tokens to the prompt yourself. Chat template tokens are not needed for base models.
+
+You can find the chat template tokens in the model's HuggingFace repo or documentation. As an example, the SmolLM2-360M-Instruct special tokens can be found [here](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct/blob/main/special_tokens_map.json).
+
+A convenient way to do this is to use the `tokenizer` from the `transformers` library:
+
+```python
+from transformers import AutoTokenizer
+
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
+prompt = tokenizer.apply_chat_template(
+    [
+        {"role": "system", "content": "You extract information from text."},
+        {"role": "user", "content": "What food does the following text describe?"},
+    ],
+    tokenize=False,
+    add_bos=True,
+    add_generation_prompt=True,
+)
+```
+
+yields
+
+```
+<|im_start|>system
+You extract information from text.<|im_end|>
+<|im_start|>user
+What food does the following text describe?<|im_end|>
+<|im_start|>assistant
+```
+
 ## Join us
 
 - 💡 **Have an idea?** Come chat with us on [Discord][discord]

From 0df6215d808dd06bc179e87472c48de7c90d48ec Mon Sep 17 00:00:00 2001
From: Cameron Pfiffer <cameron@pfiffer.org>
Date: Fri, 10 Jan 2025 14:08:47 -0800
Subject: [PATCH 2/7] Trim trailing whitespace of README

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 6ae6bef16..91e4afedf 100644
--- a/README.md
+++ b/README.md
@@ -167,10 +167,10 @@ prompt = """
 <|im_end|>
 
 <|im_start|>user
-What is an IP address of the Google DNS servers? 
+What is an IP address of the Google DNS servers?
 <|im_end|>
 <|im_start|>assistant
-The IP address of a Google DNS server is 
+The IP address of a Google DNS server is
 
 """
 
@@ -186,7 +186,7 @@ structured = generator(prompt, max_tokens=30)
 
 print(unstructured)
 # 8.8.8.8
-# 
+#
 # <|im_end|>
 
 print(structured)

From d81674900575dfd5fd90eb212be8d87526729b8e Mon Sep 17 00:00:00 2001
From: Cameron Pfiffer <cameron@pfiffer.org>
Date: Mon, 13 Jan 2025 11:54:02 -0800
Subject: [PATCH 3/7] Update README.md

Co-authored-by: Victoria Terenina <torymur@gmail.com>
---
 README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 91e4afedf..5f49cc616 100644
--- a/README.md
+++ b/README.md
@@ -101,9 +101,8 @@ Text: I really really really want pizza.
 
 generator = outlines.generate.choice(model, ["Pizza", "Pasta", "Salad", "Dessert"])
 answer = generator(prompt)
-print(f'{answer=}')
 
-# answer=Pizza
+# Likely answer: Pizza
 ```
 
 You can also pass these choices through en enum:

From 960e723662b2d302e60e159ef19d3d08151caf5c Mon Sep 17 00:00:00 2001
From: cpfiffer <cameron@pfiffer.org>
Date: Tue, 21 Jan 2025 12:28:26 -0800
Subject: [PATCH 4/7] Add chat_templating.md to the docs nav menu

---
 mkdocs.yml | 97 +++++++++++++++++++++++++++---------------------------
 1 file changed, 48 insertions(+), 49 deletions(-)

diff --git a/mkdocs.yml b/mkdocs.yml
index 7a3ca1e98..8d051d1af 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -4,7 +4,6 @@ site_author: The Outlines developers
 site_description: >-
   Structured text generation with LLMs
 
-
 # Repository
 repo_name: dottxt-ai/outlines
 repo_url: https://github.com/dottxt-ai/outlines
@@ -76,7 +75,6 @@ markdown_extensions:
       emoji_generator: !!python/name:material.extensions.emoji.to_svg
   - pymdownx.snippets:
 
-
 extra_css:
   - stylesheets/extra.css
 
@@ -131,53 +129,54 @@ nav:
           - Cerebrium: cookbook/deploy-using-cerebrium.md
           - Modal: cookbook/deploy-using-modal.md
   - Docs:
-    - reference/index.md
-    - Generation:
-        - Overview: reference/generation/generation.md
-        - Text: reference/text.md
-        - Samplers: reference/samplers.md
-        - Structured generation:
-            - How does it work?: reference/generation/structured_generation_explanation.md
-            - Classification: reference/generation/choices.md
-            - Regex: reference/generation/regex.md
-            - Type constraints: reference/generation/format.md
-            - JSON (function calling): reference/generation/json.md
-            - Grammar: reference/generation/cfg.md
-            - Creating Grammars: reference/generation/creating_grammars.md
-            - Custom FSM operations: reference/generation/custom_fsm_ops.md
-    - Utilities:
-        - Serve with vLLM: reference/serve/vllm.md
-        - Serve with LM Studio: reference/serve/lmstudio.md
-        - Custom types: reference/generation/types.md
-        - Prompt templating: reference/prompting.md
-        - Outlines functions: reference/functions.md
-    - Models:
-        - Overview: reference/models/models.md
-        - Open source:
-          - Transformers: reference/models/transformers.md
-          - Transformers Vision: reference/models/transformers_vision.md
-          - Llama.cpp: reference/models/llamacpp.md
-          - vLLM: reference/models/vllm.md
-          - TGI: reference/models/tgi.md
-          - ExllamaV2: reference/models/exllamav2.md
-          - MLX: reference/models/mlxlm.md
-          - Mamba: reference/models/transformers/#mamba
-        - API:
-            - OpenAI: reference/models/openai.md
+      - reference/index.md
+      - Generation:
+          - Overview: reference/generation/generation.md
+          - Chat templating: reference/chat_templating.md
+          - Text: reference/text.md
+          - Samplers: reference/samplers.md
+          - Structured generation:
+              - How does it work?: reference/generation/structured_generation_explanation.md
+              - Classification: reference/generation/choices.md
+              - Regex: reference/generation/regex.md
+              - Type constraints: reference/generation/format.md
+              - JSON (function calling): reference/generation/json.md
+              - Grammar: reference/generation/cfg.md
+              - Creating Grammars: reference/generation/creating_grammars.md
+              - Custom FSM operations: reference/generation/custom_fsm_ops.md
+      - Utilities:
+          - Serve with vLLM: reference/serve/vllm.md
+          - Serve with LM Studio: reference/serve/lmstudio.md
+          - Custom types: reference/generation/types.md
+          - Prompt templating: reference/prompting.md
+          - Outlines functions: reference/functions.md
+      - Models:
+          - Overview: reference/models/models.md
+          - Open source:
+              - Transformers: reference/models/transformers.md
+              - Transformers Vision: reference/models/transformers_vision.md
+              - Llama.cpp: reference/models/llamacpp.md
+              - vLLM: reference/models/vllm.md
+              - TGI: reference/models/tgi.md
+              - ExllamaV2: reference/models/exllamav2.md
+              - MLX: reference/models/mlxlm.md
+              - Mamba: reference/models/transformers/#mamba
+          - API:
+              - OpenAI: reference/models/openai.md
   - API Reference:
-    - api/index.md
-    - api/models.md
-    - api/prompts.md
-    - api/json_schema.md
-    - api/guide.md
-    - api/parsing.md
-    - api/regex.md
-    - api/samplers.md
+      - api/index.md
+      - api/models.md
+      - api/prompts.md
+      - api/json_schema.md
+      - api/guide.md
+      - api/parsing.md
+      - api/regex.md
+      - api/samplers.md
   - Community:
-    - community/index.md
-    - Feedback 🫶: community/feedback.md
-    - Chat with us ☕: https://discord.com/invite/R9DSu34mGd
-    - How to contribute 🏗️: community/contribute.md
-    - Your projects 👏: community/examples.md
-    - Versioning Guide 📌: community/versioning.md
+      - community/index.md
+      - Feedback 🫶: community/feedback.md
+      - Chat with us ☕: https://discord.com/invite/R9DSu34mGd
+      - How to contribute 🏗️: community/contribute.md
+      - Your projects 👏: community/examples.md
+      - Versioning Guide 📌: community/versioning.md
   - Blog: blog/index.md

From 1a49501aebe928db01361b1e95898a23b097a6f6 Mon Sep 17 00:00:00 2001
From: cpfiffer <cameron@pfiffer.org>
Date: Tue, 21 Jan 2025 12:28:42 -0800
Subject: [PATCH 5/7] Add chat templating section to README

---
 README.md | 39 ++++++---------------------------------
 1 file changed, 6 insertions(+), 33 deletions(-)

diff --git a/README.md b/README.md
index 53c60ef85..d87ef35f5 100644
--- a/README.md
+++ b/README.md
@@ -74,6 +74,12 @@ The following methods of structured generation are supported:
 - [Using context-free grammars to guide generation](#using-context-free-grammars-to-guide-generation)
 - [Open functions](#open-functions)
 
+### Chat template tokens
+
+Outlines does not manage chat templating tokens when using instruct models. You must apply the chat template tokens to the prompt yourself. Chat template tokens are not needed for base models.
+
+Please see [the documentation](https://dottxt-ai.github.io/outlines/latest/reference/chat_templating) on chat templating for more.
+
 ### Multiple choices
 
 You can reduce the completion to a choice between multiple possibilities:
@@ -413,39 +419,6 @@ prompt = labelling("Just awesome", examples)
 answer = outlines.generate.text(model)(prompt, max_tokens=100)
 ```
 
-### Chat template tokens
-
-Outlines does not manage chat templating tokens when using instruct models. You must apply the chat template tokens to the prompt yourself. Chat template tokens are not needed for base models.
-
-You can find the chat template tokens in the model's HuggingFace repo or documentation. As an example, the SmolLM2-360M-Instruct special tokens can be found [here](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct/blob/main/special_tokens_map.json).
-
-A convenient way to do this is to use the `tokenizer` from the `transformers` library:
-
-```python
-from transformers import AutoTokenizer
-
-tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
-prompt = tokenizer.apply_chat_template(
-    [
-        {"role": "system", "content": "You extract information from text."},
-        {"role": "user", "content": "What food does the following text describe?"},
-    ],
-    tokenize=False,
-    add_bos=True,
-    add_generation_prompt=True,
-)
-```
-
-yields
-
-```
-<|im_start|>system
-You extract information from text.<|im_end|>
-<|im_start|>user
-What food does the following text describe?<|im_end|>
-<|im_start|>assistant
-```
-
 ## Join us
 
 - 💡 **Have an idea?** Come chat with us on [Discord][discord]

From 6b4b107221114c9bc1d697bb88c543a917d542ab Mon Sep 17 00:00:00 2001
From: cpfiffer <cameron@pfiffer.org>
Date: Tue, 21 Jan 2025 12:29:20 -0800
Subject: [PATCH 6/7] Add chat templating documentation

---
 docs/reference/chat_templating.md | 38 +++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 docs/reference/chat_templating.md

diff --git a/docs/reference/chat_templating.md b/docs/reference/chat_templating.md
new file mode 100644
index 000000000..e3f33fbfd
--- /dev/null
+++ b/docs/reference/chat_templating.md
@@ -0,0 +1,38 @@
+# Chat templating
+
+Instruction-tuned language models use "special tokens" to indicate different parts of text, such as the system prompt, the user prompt, any images, and the assistant's response. A [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating) is how different types of input are composited together into a single, machine-readable string.
+
+Outlines does not manage chat templating tokens when using instruct models. You must apply the chat template tokens to the prompt yourself -- if you do not apply chat templating on instruction-tuned models, you will often get nonsensical output from the model.
+
+Chat template tokens are not needed for base models.
+
+You can find the chat template tokens in the model's HuggingFace repo or documentation. As an example, the `SmolLM2-360M-Instruct` special tokens can be found [here](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct/blob/main/special_tokens_map.json).
+
+However, it can be slow to manually look up a model's special tokens, and special tokens vary by models. If you change the model, your prompts may break if you have hard-coded special tokens.
+
+If you need a convenient tool to apply chat templating for you, you should use the `tokenizer` from the `transformers` library:
+
+```python
+from transformers import AutoTokenizer
+
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
+prompt = tokenizer.apply_chat_template(
+    [
+        {"role": "system", "content": "You extract information from text."},
+        {"role": "user", "content": "What food does the following text describe?"},
+    ],
+    tokenize=False,
+    add_bos=True,
+    add_generation_prompt=True,
+)
+```
+
+yields
+
+```
+<|im_start|>system
+You extract information from text.<|im_end|>
+<|im_start|>user
+What food does the following text describe?<|im_end|>
+<|im_start|>assistant
+```

From 6fa1446179c5b33c258f30961a4c79fa6c6df5b9 Mon Sep 17 00:00:00 2001
From: cpfiffer <cameron@pfiffer.org>
Date: Wed, 22 Jan 2025 17:42:57 -0800
Subject: [PATCH 7/7] Make multichoice/enum section in README cleaner

---
 README.md | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index d87ef35f5..b3cde1dd1 100644
--- a/README.md
+++ b/README.md
@@ -111,27 +111,20 @@ answer = generator(prompt)
 # Likely answer: Pizza
 ```
 
-You can also pass these choices through en enum:
+You can also pass in choices with an `Enum`:
 
 ````python
 from enum import Enum
 
-import outlines
-
-class Sentiment(str, Enum):
-    positive = "Positive"
-    negative = "Negative"
-
-model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
+class Food(str, Enum):
+    pizza = "Pizza"
+    pasta = "Pasta"
+    salad = "Salad"
+    dessert = "Dessert"
 
-prompt = """You are a sentiment-labelling assistant.
-Is the following review positive or negative?
-
-Review: This restaurant is just awesome!
-"""
-
-generator = outlines.generate.choice(model, Sentiment)
+generator = outlines.generate.choice(model, Food)
 answer = generator(prompt)
+# Likely answer: Pizza
 ````
 
 ### Type constraints