Chat Template fix (cont. #2235) (#2269)

baberabb · KonradSzafer · haileyschoelkopf · web-flow · commit 7a1614eb90d2 · 2024-09-04T10:09:57.000-04:00
* default chat template method fix

* move chat_template to TemplateLM

* remove hotfix

* handle openai `chat_template`

* Update lm_eval/api/model.py

Co-authored-by: Hailey Schoelkopf &lt;65563625+haileyschoelkopf@users.noreply.github.com&gt;

* add 'max_tokens' to gen_kwargs

* pre-commit

---------

Co-authored-by: KonradSzafer &lt;szafer.konrad@gmail.com&gt;
Co-authored-by: Hailey Schoelkopf &lt;65563625+haileyschoelkopf@users.noreply.github.com&gt;
diff --git a/lm_eval/api/model.py b/lm_eval/api/model.py
@@ -3,7 +3,7 @@
 import json
 import logging
 import os
-from typing import Dict, List, Optional, Tuple, Type, TypeVar
+from typing import Dict, List, Optional, Tuple, Type, TypeVar, Union
 
 import transformers
 from sqlitedict import SqliteDict
@@ -192,15 +192,13 @@ def tokenizer_name(self) -> str:
             "To use this model with chat templates, please implement the 'tokenizer_name' property."
         )
 
-    @property
-    def chat_template(self) -> str:
-        """Must be defined for LM subclasses that implement Chat Templating.
-        Should return the structure of the chat template applied to user/assistant messages.
-        This is used only to save in the experiment results for reproducibility.
+    def chat_template(self, chat_template: Union[bool, str] = False) -> Optional[str]:
+        """Returns the chat template structure for user/assistant messages if a template is provided.
+        This method is intended to be overridden in a subclass to define a specific chat template format.
+        For models that do not support chat templates, this method returns None by default.
         """
-        raise NotImplementedError(
-            "To use this model with chat templates, please implement the 'chat_template' property."
-        )
+
+        return ""
 
     def set_cache_hook(self, cache_hook) -> None:
         self.cache_hook = cache_hook
@@ -316,6 +314,8 @@ class TemplateLM(LM):
     and boilerplate often included in other LM subclasses.
     """
 
+    tokenizer = None
+
     @property
     @abc.abstractmethod
     def eot_token_id(self):
@@ -386,3 +386,99 @@ def loglikelihood_rolling(
     @abc.abstractmethod
     def generate_until(self, requests, disable_tqdm: bool = False) -> List[str]:
         pass
+
+    def chat_template(self, chat_template: Union[bool, str] = False) -> Optional[str]:
+        """
+        Set and get the appropriate chat template for the model.
+        This method sets the tokenizer's chat_template and returns the template string for reproducibility.
+
+        The template selection logic is adapted from the Transformers library's `apply_chat_template`
+        method in the Tokenizer class. The original implementation can be found at:
+        https://github.com/huggingface/transformers/blob/fc35907f95459d7a6c5281dfadd680b6f7b620e3/src/transformers/tokenization_utils_base.py#L1687
+
+        This method ensures that the right template is chosen based on the following:
+        0. If the model has no 'tokenizer' attribute: assumes that there is only a single possible chat template, handled on the model provider side internally. Returns the empty string.
+        1. If the model's tokenizer has multiple templates:
+            a. Use the specified template if it exists in the dictionary.
+            b. Use the default template from the list if no specific template is provided.
+            c. Raise an error if no default template exists and no specific template is provided.
+        2. If the model's tokenizer has a single template or no template:
+            a. Use the tokenizer's chat template if available.
+            b. Fall back to the default chat template if no tokenizer chat template exists.
+
+        Args:
+            chat_template (Union[bool, str]): Specifies the chat template to use.
+                - If False or None, no template is applied.
+                - If True, the default or only available template is used.
+                - If a string, the template with the matching name is used.
+
+        Returns:
+            Optional[str]: The selected chat template, or None if no template is applied.
+        """
+        if self.tokenizer is None:
+            return ""
+
+        if chat_template is False or chat_template is None:
+            eval_logger.warning(
+                "model.chat_template was called with the chat_template set to False or None. "
+                "Therefore no chat template will be applied. Make sure this is an intended behavior."
+            )
+            return None
+
+        # Convert boolean chat_template to None to ensure compatibility with the adapted logic
+        if isinstance(chat_template, bool):
+            chat_template = None
+        using_default_template = False
+
+        # First, handle the cases when the model has a dict of multiple templates
+        template = self.tokenizer.chat_template or self.tokenizer.default_chat_template
+
+        if isinstance(template, dict):
+            using_default_dict = self.tokenizer.chat_template is None
+
+            if chat_template is not None:
+                if chat_template in template:
+                    selected_template = template[chat_template]
+                    if using_default_dict:
+                        using_default_template = True
+                else:
+                    raise ValueError(
+                        f"The specified chat template '{chat_template}' is not available. "
+                        f"Available template names are {sorted(template.keys())}."
+                    )
+            else:
+                # If user didn't pass a chat template, use the default template from the dict
+                if "default" in template:
+                    selected_template = template["default"]
+                    using_default_template = True
+                else:
+                    raise ValueError(
+                        "This model has multiple chat templates with no default specified! Please either pass a chat "
+                        "template or the name of the template you wish to use to the `chat_template` argument. Available "
+                        f"template names are {sorted(template.keys())}."
+                    )
+
+        # Cases when the model has a single template or no template
+        else:
+            # priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template
+            if isinstance(chat_template, str):
+                eval_logger.warning(
+                    "Chat template name provided, but the tokenizer's chat template is not a dictionary. "
+                    "Using the tokenizer's chat template or the default template instead."
+                )
+            if self.tokenizer.chat_template is not None:
+                selected_template = self.tokenizer.chat_template
+            else:
+                selected_template = self.tokenizer.default_chat_template
+                using_default_template = True
+
+        if using_default_template:
+            eval_logger.warning(
+                "No chat template is set for this tokenizer, falling back to a default class-level template. This is "
+                "very error-prone, because models are often trained with templates different from the class default! "
+                "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
+                "point any code depending on them will stop working. We recommend setting a valid chat template before "
+                "then to ensure that this model continues working without issues."
+            )
+
+        return selected_template
diff --git a/lm_eval/evaluator.py b/lm_eval/evaluator.py
@@ -289,18 +289,12 @@ def _adjust_config(task_dict):
     if check_integrity:
         run_task_tests(task_list=tasks)
 
-    # hotfix: delete when chat_template fixed
-    try:
-        chat = lm.chat_template(apply_chat_template)
-    except:  # noqa: E722
-        chat = None
-
     if evaluation_tracker is not None:
         evaluation_tracker.general_config_tracker.log_experiment_args(
             model_source=model,
             model_args=model_args,
             system_instruction=system_instruction,
-            chat_template=chat,
+            chat_template=lm.chat_template(apply_chat_template),
             fewshot_as_multiturn=fewshot_as_multiturn,
         )
 
diff --git a/lm_eval/models/api_models.py b/lm_eval/models/api_models.py
@@ -225,14 +225,6 @@ def header(self) -> dict:
         """Override this property to return the headers for the API request."""
         return {"Authorization": f"Bearer {self.api_key}"}
 
-    @property
-    def chat_template(self) -> str:
-        """Must be defined for LM subclasses that implement Chat Templating.
-        Should return the structure of the chat template applied to user/assistant messages.
-        Only used for logging and reproducibility.
-        """
-        return ""
-
     @property
     def tokenizer_name(self) -> str:
         """Must be defined for LM subclasses which implement Chat Templating.
diff --git a/lm_eval/models/huggingface.py b/lm_eval/models/huggingface.py
@@ -438,98 +438,6 @@ def world_size(self):
     def tokenizer_name(self) -> str:
         return self.tokenizer.name_or_path.replace("/", "__")
 
-    def chat_template(self, chat_template: Union[bool, str] = False) -> Optional[str]:
-        """
-        Get the appropriate chat template for the model based on configuration and input.
-        This method determines, and returns the correct chat template, ensuring reproducibility.
-
-        The template selection logic is adapted from the Transformers library's `apply_chat_template`
-        method in the Tokenizer class. The original implementation can be found at:
-        https://github.com/huggingface/transformers/blob/fc35907f95459d7a6c5281dfadd680b6f7b620e3/src/transformers/tokenization_utils_base.py#L1687
-
-        This method ensures that the right template is chosen based on the following:
-        1. If the model's tokenizer has multiple templates:
-            a. Use the specified template if it exists in the dictionary.
-            b. Use the default template from the list if no specific template is provided.
-            c. Raise an error if no default template exists and no specific template is provided.
-        2. If the model's tokenizer has a single template or no template:
-            a. Use the tokenizer's chat template if available.
-            b. Fall back to the default chat template if no tokenizer chat template exists.
-
-        Args:
-            chat_template (Union[bool, str]): Specifies the chat template to use.
-                - If False or None, no template is applied.
-                - If True, the default or only available template is used.
-                - If a string, the template with the matching name is used.
-
-        Returns:
-            Optional[str]: The selected chat template, or None if no template is applied.
-        """
-        if chat_template is False or chat_template is None:
-            eval_logger.warning(
-                "model.chat_template was called with the chat_template set to False or None. "
-                "Therefore no chat template will be applied. Make sure this is an intended behavior."
-            )
-            return None
-
-        # Convert boolean chat_template to None to ensure compatibility with the adapted logic
-        if isinstance(chat_template, bool):
-            chat_template = None
-        using_default_template = False
-
-        # First, handle the cases when the model has a dict of multiple templates
-        template = self.tokenizer.chat_template or self.tokenizer.default_chat_template
-
-        if isinstance(template, dict):
-            using_default_dict = self.tokenizer.chat_template is None
-
-            if chat_template is not None:
-                if chat_template in template:
-                    selected_template = template[chat_template]
-                    if using_default_dict:
-                        using_default_template = True
-                else:
-                    raise ValueError(
-                        f"The specified chat template '{chat_template}' is not available. "
-                        f"Available template names are {sorted(template.keys())}."
-                    )
-            else:
-                # If user didn't pass a chat template, use the default template from the dict
-                if "default" in template:
-                    selected_template = template["default"]
-                    using_default_template = True
-                else:
-                    raise ValueError(
-                        "This model has multiple chat templates with no default specified! Please either pass a chat "
-                        "template or the name of the template you wish to use to the `chat_template` argument. Available "
-                        f"template names are {sorted(template.keys())}."
-                    )
-
-        # Cases when the model has a single template or no template
-        else:
-            # priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template
-            if isinstance(chat_template, str):
-                eval_logger.warning(
-                    "Chat template name provided, but the tokenizer's chat template is not a dictionary. "
-                    "Using the tokenizer's chat template or the default template instead."
-                )
-            if self.tokenizer.chat_template is not None:
-                selected_template = self.tokenizer.chat_template
-            else:
-                selected_template = self.tokenizer.default_chat_template
-                using_default_template = True
-
-        if using_default_template:
-            eval_logger.warning(
-                "No chat template is set for this tokenizer, falling back to a default class-level template. This is "
-                "very error-prone, because models are often trained with templates different from the class default! "
-                "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
-                "point any code depending on them will stop working. We recommend setting a valid chat template before "
-                "then to ensure that this model continues working without issues."
-            )
-
-        return selected_template
-
     def _get_backend(
         self,
         config: Union[transformers.PretrainedConfig, transformers.AutoConfig],
diff --git a/lm_eval/models/openai_completions.py b/lm_eval/models/openai_completions.py
@@ -29,7 +29,10 @@ def _create_payload(
     ) -> dict:
         if generate:
             gen_kwargs.pop("do_sample", False)
-            max_tokens = gen_kwargs.pop("max_gen_toks", self._max_gen_toks)
+            if "max_tokens" in gen_kwargs:
+                max_tokens = gen_kwargs.pop("max_tokens")
+            else:
+                max_tokens = gen_kwargs.pop("max_gen_toks", self._max_gen_toks)
             temperature = gen_kwargs.pop("temperature", 0)
             stop = gen_kwargs.pop("until", ["<|endoftext|>"])
             return {
@@ -124,7 +127,10 @@ def _create_payload(
         **kwargs,
     ) -> dict:
         gen_kwargs.pop("do_sample", False)
-        max_tokens = gen_kwargs.pop("max_gen_toks", self._max_gen_toks)
+        if "max_tokens" in gen_kwargs:
+            max_tokens = gen_kwargs.pop("max_tokens")
+        else:
+            max_tokens = gen_kwargs.pop("max_gen_toks", self._max_gen_toks)
         temperature = gen_kwargs.pop("temperature", 0)
         stop = gen_kwargs.pop("until", ["<|endoftext|>"])
         if not isinstance(stop, (list, tuple)):
@@ -194,6 +200,9 @@ def loglikelihood(self, requests, **kwargs):
         ), "Loglikelihood is not supported for gpt-3.5-turbo"
         return super().loglikelihood(requests, **kwargs)
 
+    def chat_template(self, chat_template: Union[bool, str] = False) -> Optional[str]:
+        return ""
+
 
 @register_model("openai-chat-completions")
 class OpenAIChatCompletion(LocalChatCompletion):
diff --git a/lm_eval/models/vllm_causallms.py b/lm_eval/models/vllm_causallms.py
@@ -187,12 +187,6 @@ def apply_chat_template(self, chat_history: List[Dict[str, str]]) -> str:
             chat_history, tokenize=False, add_generation_prompt=True
         )
 
-    @property
-    def chat_template(self) -> str:
-        if self.tokenizer.chat_template is not None:
-            return self.tokenizer.chat_template
-        return self.tokenizer.default_chat_template
-
     @property
     def tokenizer_name(self) -> str:
         return self.tokenizer.name_or_path.replace("/", "__")