From a19e07c406d5990448ce5e2bd16b37474849db88 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Tue, 4 Feb 2025 14:04:29 +0000
Subject: [PATCH 1/7] commit

---
 examples/model_configs/vllm_model_config.yaml |  5 +++++
 src/lighteval/main_vllm.py                    | 13 +++++--------
 2 files changed, 10 insertions(+), 8 deletions(-)
 create mode 100644 examples/model_configs/vllm_model_config.yaml

diff --git a/examples/model_configs/vllm_model_config.yaml b/examples/model_configs/vllm_model_config.yaml
new file mode 100644
index 000000000..3d9054d18
--- /dev/null
+++ b/examples/model_configs/vllm_model_config.yaml
@@ -0,0 +1,5 @@
+model:
+  base_params:
+    model_args: "pretrained=HuggingFaceTB/SmolLM-1.7B,revision=main,dtype=bfloat16" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ...
+  generation:
+    temperature: 0.3
diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py
index d063c3fa8..d62d85aec 100644
--- a/src/lighteval/main_vllm.py
+++ b/src/lighteval/main_vllm.py
@@ -63,9 +63,6 @@ def vllm(
     num_fewshot_seeds: Annotated[
         int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
     ] = 1,
-    load_responses_from_details_date_id: Annotated[
-        Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
     # === saving ===
     output_dir: Annotated[
         str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
@@ -127,18 +124,18 @@ def vllm(
         max_samples=max_samples,
         use_chat_template=use_chat_template,
         system_prompt=system_prompt,
-        load_responses_from_details_date_id=load_responses_from_details_date_id,
     )
 
     if model_args.endswith(".yaml"):
         with open(model_args, "r") as f:
             config = yaml.safe_load(f)["model"]
+        model_args = config["base_params"]["model_args"]
         generation_parameters = GenerationParameters.from_dict(config)
-        model_config = VLLMModelConfig(config, generation_parameters=generation_parameters)
-
     else:
-        model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")}
-        model_config = VLLMModelConfig(**model_args_dict)
+        generation_parameters = GenerationParameters()
+
+    model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")}
+    model_config = VLLMModelConfig(**model_args_dict, generation_parameters=generation_parameters)
 
     pipeline = Pipeline(
         tasks=tasks,

From 8e21cd5c1a5c840ab0a188e015665c80b7dc3c4e Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Tue, 4 Feb 2025 14:07:17 +0000
Subject: [PATCH 2/7] commit

---
 src/lighteval/main_vllm.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py
index d62d85aec..6d66faac8 100644
--- a/src/lighteval/main_vllm.py
+++ b/src/lighteval/main_vllm.py
@@ -63,6 +63,9 @@ def vllm(
     num_fewshot_seeds: Annotated[
         int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
     ] = 1,
+    load_responses_from_details_date_id: Annotated[
+        Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
+    ] = None,
     # === saving ===
     output_dir: Annotated[
         str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
@@ -124,6 +127,7 @@ def vllm(
         max_samples=max_samples,
         use_chat_template=use_chat_template,
         system_prompt=system_prompt,
+        load_responses_from_details_date_id=load_responses_from_details_date_id,
     )
 
     if model_args.endswith(".yaml"):

From 2802744a5da9de53ab69773ece6269ea4f51f928 Mon Sep 17 00:00:00 2001
From: Nathan Habib <30601243+NathanHB@users.noreply.github.com>
Date: Tue, 4 Feb 2025 16:10:52 +0100
Subject: [PATCH 3/7] Update src/lighteval/main_vllm.py

---
 src/lighteval/main_vllm.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py
index 6d66faac8..d12281990 100644
--- a/src/lighteval/main_vllm.py
+++ b/src/lighteval/main_vllm.py
@@ -136,7 +136,6 @@ def vllm(
         model_args = config["base_params"]["model_args"]
         generation_parameters = GenerationParameters.from_dict(config)
     else:
-        generation_parameters = GenerationParameters()
 
     model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")}
     model_config = VLLMModelConfig(**model_args_dict, generation_parameters=generation_parameters)

From b4c2d778fe4f7ba1e506a7058706c076e9e14b3a Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Wed, 5 Feb 2025 09:15:24 +0000
Subject: [PATCH 4/7] commit

---
 src/lighteval/main_vllm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py
index d12281990..6d66faac8 100644
--- a/src/lighteval/main_vllm.py
+++ b/src/lighteval/main_vllm.py
@@ -136,6 +136,7 @@ def vllm(
         model_args = config["base_params"]["model_args"]
         generation_parameters = GenerationParameters.from_dict(config)
     else:
+        generation_parameters = GenerationParameters()
 
     model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")}
     model_config = VLLMModelConfig(**model_args_dict, generation_parameters=generation_parameters)

From 6ed696d84e07338316fbfecf575278456fe3af61 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Thu, 6 Feb 2025 13:39:38 +0000
Subject: [PATCH 5/7] change doc

---
 docs/source/use-vllm-as-backend.mdx | 43 +++++++++++++++++++----------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/docs/source/use-vllm-as-backend.mdx b/docs/source/use-vllm-as-backend.mdx
index 787848c36..edf9ad1c0 100644
--- a/docs/source/use-vllm-as-backend.mdx
+++ b/docs/source/use-vllm-as-backend.mdx
@@ -29,20 +29,35 @@ lighteval vllm \
     "leaderboard|truthfulqa:mc|0|0"
 ```
 
-Available arguments for `vllm` can be found in the `VLLMModelConfig`:
-
-- **pretrained** (str): HuggingFace Hub model ID name or the path to a pre-trained model to load.
-- **gpu_memory_utilisation** (float): The fraction of GPU memory to use.
-- **revision** (str): The revision of the model.
-- **dtype** (str, None): The data type to use for the model.
-- **tensor_parallel_size** (int): The number of tensor parallel units to use.
-- **data_parallel_size** (int): The number of data parallel units to use.
-- **max_model_length** (int): The maximum length of the model.
-- **swap_space** (int): The CPU swap space size (GiB) per GPU.
-- **seed** (int): The seed to use for the model.
-- **trust_remote_code** (bool): Whether to trust remote code during model loading.
-- **add_special_tokens** (bool): Whether to add special tokens to the input sequences.
-- **multichoice_continuations_start_space** (bool): Whether to add a space at the start of each continuation in multichoice generation.
+For more advanced configurations, you can use a config file for the model.
+An example of a config file is shown below and can be found at `examples/model_configs/vllm_model_config.yaml`.
+
+```bash
+lighteval vllm \
+    "examples/model_configs/vllm_model_config.yaml" \
+    "leaderboard|truthfulqa:mc|0|0"
+```
+
+```yaml
+model: # Model specific parameters
+  base_params:
+    model_args: "pretrained=HuggingFaceTB/SmolLM-1.7B,revision=main,dtype=bfloat16" # Model args that you would pass in the command line
+  generation: # Generation specific parameters
+    temperature: 0.3
+    early_stopping: 1
+    repetition_penalty: 1.0
+    frequency_penalty: 0.0
+    length_penalty: 0.0
+    presence_penalty: 0.0
+    max_new_tokens: 100
+    min_new_tokens: 1
+    seed: 42
+    stop_tokens: null
+    top_k: 0
+    min_p: 0.0
+    top_p: 0.9
+    truncate_prompt: false
+```
 
 > [!WARNING]
 > In the case of OOM issues, you might need to reduce the context size of the

From c75869bc28bfffead0be836837f5fcf4942bf15e Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Thu, 6 Feb 2025 14:30:33 +0000
Subject: [PATCH 6/7] change doc

---
 docs/source/use-vllm-as-backend.mdx           | 6 ------
 examples/model_configs/vllm_model_config.yaml | 7 +++++++
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/docs/source/use-vllm-as-backend.mdx b/docs/source/use-vllm-as-backend.mdx
index edf9ad1c0..8a16ba7f3 100644
--- a/docs/source/use-vllm-as-backend.mdx
+++ b/docs/source/use-vllm-as-backend.mdx
@@ -44,19 +44,13 @@ model: # Model specific parameters
     model_args: "pretrained=HuggingFaceTB/SmolLM-1.7B,revision=main,dtype=bfloat16" # Model args that you would pass in the command line
   generation: # Generation specific parameters
     temperature: 0.3
-    early_stopping: 1
     repetition_penalty: 1.0
     frequency_penalty: 0.0
-    length_penalty: 0.0
     presence_penalty: 0.0
-    max_new_tokens: 100
-    min_new_tokens: 1
     seed: 42
-    stop_tokens: null
     top_k: 0
     min_p: 0.0
     top_p: 0.9
-    truncate_prompt: false
 ```
 
 > [!WARNING]
diff --git a/examples/model_configs/vllm_model_config.yaml b/examples/model_configs/vllm_model_config.yaml
index 3d9054d18..6287a3d23 100644
--- a/examples/model_configs/vllm_model_config.yaml
+++ b/examples/model_configs/vllm_model_config.yaml
@@ -3,3 +3,10 @@ model:
     model_args: "pretrained=HuggingFaceTB/SmolLM-1.7B,revision=main,dtype=bfloat16" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ...
   generation:
     temperature: 0.3
+    repetition_penalty: 1.0
+    frequency_penalty: 0.0
+    presence_penalty: 0.0
+    seed: 42
+    top_k: 0
+    min_p: 0.0
+    top_p: 0.9

From 952589e17832a259afadba0c4888903768412e0b Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Thu, 6 Feb 2025 15:04:01 +0000
Subject: [PATCH 7/7] change doc

---
 docs/source/use-vllm-as-backend.mdx           |  2 ++
 examples/model_configs/vllm_model_config.yaml |  4 +++-
 src/lighteval/models/model_input.py           | 17 +++++++++++++++++
 src/lighteval/models/vllm/vllm_model.py       |  2 +-
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/docs/source/use-vllm-as-backend.mdx b/docs/source/use-vllm-as-backend.mdx
index 8a16ba7f3..76e997d94 100644
--- a/docs/source/use-vllm-as-backend.mdx
+++ b/docs/source/use-vllm-as-backend.mdx
@@ -29,6 +29,8 @@ lighteval vllm \
     "leaderboard|truthfulqa:mc|0|0"
 ```
 
+## Use a config file
+
 For more advanced configurations, you can use a config file for the model.
 An example of a config file is shown below and can be found at `examples/model_configs/vllm_model_config.yaml`.
 
diff --git a/examples/model_configs/vllm_model_config.yaml b/examples/model_configs/vllm_model_config.yaml
index 6287a3d23..be8941a66 100644
--- a/examples/model_configs/vllm_model_config.yaml
+++ b/examples/model_configs/vllm_model_config.yaml
@@ -7,6 +7,8 @@ model:
     frequency_penalty: 0.0
     presence_penalty: 0.0
     seed: 42
-    top_k: 0
+    top_k: -1
     min_p: 0.0
     top_p: 0.9
+    max_new_tokens: 100
+    stop_tokens: ["<EOS>", "<PAD>"]
diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py
index 04e35be17..30288363d 100644
--- a/src/lighteval/models/model_input.py
+++ b/src/lighteval/models/model_input.py
@@ -59,6 +59,23 @@ def from_dict(cls, config_dict: dict):
         """
         return GenerationParameters(**config_dict.get("generation", {}))
 
+    def to_vllm_dict(self) -> dict:
+        """Selects relevant generation and sampling parameters for vllm models.
+        Doc: https://docs.vllm.ai/en/v0.5.5/dev/sampling_params.html
+
+        Returns:
+            dict: The parameters to create a vllm.SamplingParams in the model config.
+        """
+        sampling_params_to_vllm_naming = {
+            "max_new_tokens": "max_tokens",
+            "min_new_tokens": "min_tokens",
+            "stop_tokens": "stop",
+        }
+
+        # Task specific sampling params to set in model: n, best_of, use_beam_search
+        # Generation specific params to set in model: logprobs, prompt_logprobs
+        return {sampling_params_to_vllm_naming.get(k, k): v for k, v in asdict(self).items() if v is not None}
+
     def to_vllm_openai_dict(self) -> dict:
         """Selects relevant generation and sampling parameters for vllm and openai models.
         Doc: https://docs.vllm.ai/en/v0.5.5/dev/sampling_params.html
diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py
index 3398f7218..314df3738 100644
--- a/src/lighteval/models/vllm/vllm_model.py
+++ b/src/lighteval/models/vllm/vllm_model.py
@@ -128,7 +128,7 @@ def __init__(
         self.precision = _get_dtype(config.dtype, config=self._config)
 
         self.model_info = ModelInfo(model_name=self.model_name, model_sha=self.model_sha)
-        self.sampling_params = SamplingParams(**config.generation_parameters.to_vllm_openai_dict())
+        self.sampling_params = SamplingParams(**config.generation_parameters.to_vllm_dict())
         self.pairwise_tokenization = config.pairwise_tokenization
 
     @property