From a19e07c406d5990448ce5e2bd16b37474849db88 Mon Sep 17 00:00:00 2001 From: Nathan Habib Date: Tue, 4 Feb 2025 14:04:29 +0000 Subject: [PATCH 1/7] commit --- examples/model_configs/vllm_model_config.yaml | 5 +++++ src/lighteval/main_vllm.py | 13 +++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) create mode 100644 examples/model_configs/vllm_model_config.yaml diff --git a/examples/model_configs/vllm_model_config.yaml b/examples/model_configs/vllm_model_config.yaml new file mode 100644 index 000000000..3d9054d18 --- /dev/null +++ b/examples/model_configs/vllm_model_config.yaml @@ -0,0 +1,5 @@ +model: + base_params: + model_args: "pretrained=HuggingFaceTB/SmolLM-1.7B,revision=main,dtype=bfloat16" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... + generation: + temperature: 0.3 diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py index d063c3fa8..d62d85aec 100644 --- a/src/lighteval/main_vllm.py +++ b/src/lighteval/main_vllm.py @@ -63,9 +63,6 @@ def vllm( num_fewshot_seeds: Annotated[ int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1) ] = 1, - load_responses_from_details_date_id: Annotated[ - Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1) - ] = None, # === saving === output_dir: Annotated[ str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2) @@ -127,18 +124,18 @@ def vllm( max_samples=max_samples, use_chat_template=use_chat_template, system_prompt=system_prompt, - load_responses_from_details_date_id=load_responses_from_details_date_id, ) if model_args.endswith(".yaml"): with open(model_args, "r") as f: config = yaml.safe_load(f)["model"] + model_args = config["base_params"]["model_args"] generation_parameters = GenerationParameters.from_dict(config) - model_config = VLLMModelConfig(config, generation_parameters=generation_parameters) - else: - model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")} - model_config = VLLMModelConfig(**model_args_dict) + generation_parameters = GenerationParameters() + + model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")} + model_config = VLLMModelConfig(**model_args_dict, generation_parameters=generation_parameters) pipeline = Pipeline( tasks=tasks, From 8e21cd5c1a5c840ab0a188e015665c80b7dc3c4e Mon Sep 17 00:00:00 2001 From: Nathan Habib Date: Tue, 4 Feb 2025 14:07:17 +0000 Subject: [PATCH 2/7] commit --- src/lighteval/main_vllm.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py index d62d85aec..6d66faac8 100644 --- a/src/lighteval/main_vllm.py +++ b/src/lighteval/main_vllm.py @@ -63,6 +63,9 @@ def vllm( num_fewshot_seeds: Annotated[ int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1) ] = 1, + load_responses_from_details_date_id: Annotated[ + Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1) + ] = None, # === saving === output_dir: Annotated[ str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2) @@ -124,6 +127,7 @@ def vllm( max_samples=max_samples, use_chat_template=use_chat_template, system_prompt=system_prompt, + load_responses_from_details_date_id=load_responses_from_details_date_id, ) if model_args.endswith(".yaml"): From 2802744a5da9de53ab69773ece6269ea4f51f928 Mon Sep 17 00:00:00 2001 From: Nathan Habib <30601243+NathanHB@users.noreply.github.com> Date: Tue, 4 Feb 2025 16:10:52 +0100 Subject: [PATCH 3/7] Update src/lighteval/main_vllm.py --- src/lighteval/main_vllm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py index 6d66faac8..d12281990 100644 --- a/src/lighteval/main_vllm.py +++ b/src/lighteval/main_vllm.py @@ -136,7 +136,6 @@ def vllm( model_args = config["base_params"]["model_args"] generation_parameters = GenerationParameters.from_dict(config) else: - generation_parameters = GenerationParameters() model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")} model_config = VLLMModelConfig(**model_args_dict, generation_parameters=generation_parameters) From b4c2d778fe4f7ba1e506a7058706c076e9e14b3a Mon Sep 17 00:00:00 2001 From: Nathan Habib Date: Wed, 5 Feb 2025 09:15:24 +0000 Subject: [PATCH 4/7] commit --- src/lighteval/main_vllm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py index d12281990..6d66faac8 100644 --- a/src/lighteval/main_vllm.py +++ b/src/lighteval/main_vllm.py @@ -136,6 +136,7 @@ def vllm( model_args = config["base_params"]["model_args"] generation_parameters = GenerationParameters.from_dict(config) else: + generation_parameters = GenerationParameters() model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")} model_config = VLLMModelConfig(**model_args_dict, generation_parameters=generation_parameters) From 6ed696d84e07338316fbfecf575278456fe3af61 Mon Sep 17 00:00:00 2001 From: Nathan Habib Date: Thu, 6 Feb 2025 13:39:38 +0000 Subject: [PATCH 5/7] change doc --- docs/source/use-vllm-as-backend.mdx | 43 +++++++++++++++++++---------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/docs/source/use-vllm-as-backend.mdx b/docs/source/use-vllm-as-backend.mdx index 787848c36..edf9ad1c0 100644 --- a/docs/source/use-vllm-as-backend.mdx +++ b/docs/source/use-vllm-as-backend.mdx @@ -29,20 +29,35 @@ lighteval vllm \ "leaderboard|truthfulqa:mc|0|0" ``` -Available arguments for `vllm` can be found in the `VLLMModelConfig`: - -- **pretrained** (str): HuggingFace Hub model ID name or the path to a pre-trained model to load. -- **gpu_memory_utilisation** (float): The fraction of GPU memory to use. -- **revision** (str): The revision of the model. -- **dtype** (str, None): The data type to use for the model. -- **tensor_parallel_size** (int): The number of tensor parallel units to use. -- **data_parallel_size** (int): The number of data parallel units to use. -- **max_model_length** (int): The maximum length of the model. -- **swap_space** (int): The CPU swap space size (GiB) per GPU. -- **seed** (int): The seed to use for the model. -- **trust_remote_code** (bool): Whether to trust remote code during model loading. -- **add_special_tokens** (bool): Whether to add special tokens to the input sequences. -- **multichoice_continuations_start_space** (bool): Whether to add a space at the start of each continuation in multichoice generation. +For more advanced configurations, you can use a config file for the model. +An example of a config file is shown below and can be found at `examples/model_configs/vllm_model_config.yaml`. + +```bash +lighteval vllm \ + "examples/model_configs/vllm_model_config.yaml" \ + "leaderboard|truthfulqa:mc|0|0" +``` + +```yaml +model: # Model specific parameters + base_params: + model_args: "pretrained=HuggingFaceTB/SmolLM-1.7B,revision=main,dtype=bfloat16" # Model args that you would pass in the command line + generation: # Generation specific parameters + temperature: 0.3 + early_stopping: 1 + repetition_penalty: 1.0 + frequency_penalty: 0.0 + length_penalty: 0.0 + presence_penalty: 0.0 + max_new_tokens: 100 + min_new_tokens: 1 + seed: 42 + stop_tokens: null + top_k: 0 + min_p: 0.0 + top_p: 0.9 + truncate_prompt: false +``` > [!WARNING] > In the case of OOM issues, you might need to reduce the context size of the From c75869bc28bfffead0be836837f5fcf4942bf15e Mon Sep 17 00:00:00 2001 From: Nathan Habib Date: Thu, 6 Feb 2025 14:30:33 +0000 Subject: [PATCH 6/7] change doc --- docs/source/use-vllm-as-backend.mdx | 6 ------ examples/model_configs/vllm_model_config.yaml | 7 +++++++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/source/use-vllm-as-backend.mdx b/docs/source/use-vllm-as-backend.mdx index edf9ad1c0..8a16ba7f3 100644 --- a/docs/source/use-vllm-as-backend.mdx +++ b/docs/source/use-vllm-as-backend.mdx @@ -44,19 +44,13 @@ model: # Model specific parameters model_args: "pretrained=HuggingFaceTB/SmolLM-1.7B,revision=main,dtype=bfloat16" # Model args that you would pass in the command line generation: # Generation specific parameters temperature: 0.3 - early_stopping: 1 repetition_penalty: 1.0 frequency_penalty: 0.0 - length_penalty: 0.0 presence_penalty: 0.0 - max_new_tokens: 100 - min_new_tokens: 1 seed: 42 - stop_tokens: null top_k: 0 min_p: 0.0 top_p: 0.9 - truncate_prompt: false ``` > [!WARNING] diff --git a/examples/model_configs/vllm_model_config.yaml b/examples/model_configs/vllm_model_config.yaml index 3d9054d18..6287a3d23 100644 --- a/examples/model_configs/vllm_model_config.yaml +++ b/examples/model_configs/vllm_model_config.yaml @@ -3,3 +3,10 @@ model: model_args: "pretrained=HuggingFaceTB/SmolLM-1.7B,revision=main,dtype=bfloat16" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... generation: temperature: 0.3 + repetition_penalty: 1.0 + frequency_penalty: 0.0 + presence_penalty: 0.0 + seed: 42 + top_k: 0 + min_p: 0.0 + top_p: 0.9 From 952589e17832a259afadba0c4888903768412e0b Mon Sep 17 00:00:00 2001 From: Nathan Habib Date: Thu, 6 Feb 2025 15:04:01 +0000 Subject: [PATCH 7/7] change doc --- docs/source/use-vllm-as-backend.mdx | 2 ++ examples/model_configs/vllm_model_config.yaml | 4 +++- src/lighteval/models/model_input.py | 17 +++++++++++++++++ src/lighteval/models/vllm/vllm_model.py | 2 +- 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/docs/source/use-vllm-as-backend.mdx b/docs/source/use-vllm-as-backend.mdx index 8a16ba7f3..76e997d94 100644 --- a/docs/source/use-vllm-as-backend.mdx +++ b/docs/source/use-vllm-as-backend.mdx @@ -29,6 +29,8 @@ lighteval vllm \ "leaderboard|truthfulqa:mc|0|0" ``` +## Use a config file + For more advanced configurations, you can use a config file for the model. An example of a config file is shown below and can be found at `examples/model_configs/vllm_model_config.yaml`. diff --git a/examples/model_configs/vllm_model_config.yaml b/examples/model_configs/vllm_model_config.yaml index 6287a3d23..be8941a66 100644 --- a/examples/model_configs/vllm_model_config.yaml +++ b/examples/model_configs/vllm_model_config.yaml @@ -7,6 +7,8 @@ model: frequency_penalty: 0.0 presence_penalty: 0.0 seed: 42 - top_k: 0 + top_k: -1 min_p: 0.0 top_p: 0.9 + max_new_tokens: 100 + stop_tokens: ["", ""] diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py index 04e35be17..30288363d 100644 --- a/src/lighteval/models/model_input.py +++ b/src/lighteval/models/model_input.py @@ -59,6 +59,23 @@ def from_dict(cls, config_dict: dict): """ return GenerationParameters(**config_dict.get("generation", {})) + def to_vllm_dict(self) -> dict: + """Selects relevant generation and sampling parameters for vllm models. + Doc: https://docs.vllm.ai/en/v0.5.5/dev/sampling_params.html + + Returns: + dict: The parameters to create a vllm.SamplingParams in the model config. + """ + sampling_params_to_vllm_naming = { + "max_new_tokens": "max_tokens", + "min_new_tokens": "min_tokens", + "stop_tokens": "stop", + } + + # Task specific sampling params to set in model: n, best_of, use_beam_search + # Generation specific params to set in model: logprobs, prompt_logprobs + return {sampling_params_to_vllm_naming.get(k, k): v for k, v in asdict(self).items() if v is not None} + def to_vllm_openai_dict(self) -> dict: """Selects relevant generation and sampling parameters for vllm and openai models. Doc: https://docs.vllm.ai/en/v0.5.5/dev/sampling_params.html diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 3398f7218..314df3738 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -128,7 +128,7 @@ def __init__( self.precision = _get_dtype(config.dtype, config=self._config) self.model_info = ModelInfo(model_name=self.model_name, model_sha=self.model_sha) - self.sampling_params = SamplingParams(**config.generation_parameters.to_vllm_openai_dict()) + self.sampling_params = SamplingParams(**config.generation_parameters.to_vllm_dict()) self.pairwise_tokenization = config.pairwise_tokenization @property