Update docs about inference endpoints (#432)

albertvillanova · web-flow · commit 075a26603e40 · 2024-12-10T17:11:58.000+01:00
* Delete type and rename model in endpoint docs

* Explain to pass either model_name or endpoint_name+reuse_existing

* Fix legacy instance type and size in docs

* Minor fix
diff --git a/docs/source/evaluate-the-model-on-a-server-or-container.mdx b/docs/source/evaluate-the-model-on-a-server-or-container.mdx
@@ -26,22 +26,22 @@ __configuration file example:__
 
 ```yaml
 model:
-  type: "endpoint"
   base_params:
-    endpoint_name: "llama-2-7B-lighteval" # needs to be lower case without special characters
-    model: "meta-llama/Llama-2-7b-hf"
+    # Pass either model_name, or endpoint_name and true reuse_existing
+    # endpoint_name: "llama-2-7B-lighteval" # needs to be lower case without special characters
+    # reuse_existing: true # defaults to false; if true, ignore all params in instance, and don't delete the endpoint after evaluation
+    model_name: "meta-llama/Llama-2-7b-hf"
     revision: "main"
     dtype: "float16" # can be any of "awq", "eetq", "gptq", "4bit' or "8bit" (will use bitsandbytes), "bfloat16" or "float16"
-    reuse_existing: false # if true, ignore all params in instance, and don't delete the endpoint after evaluation
   instance:
     accelerator: "gpu"
     region: "eu-west-1"
     vendor: "aws"
-    instance_size: "medium"
-    instance_type: "g5.2xlarge"
+    instance_type: "nvidia-a10g"
+    instance_size: "x1"
     framework: "pytorch"
     endpoint_type: "protected"
-    namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace
+    namespace: null # The namespace under which to launch the endpoint. Defaults to the current user's namespace
     image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models.
     env_vars:
       null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048`
@@ -58,7 +58,6 @@ __configuration file example:__
 
 ```yaml
 model:
-  type: "tgi"
   instance:
     inference_server_address: ""
     inference_server_auth: null
diff --git a/examples/model_configs/endpoint_model.yaml b/examples/model_configs/endpoint_model.yaml
@@ -1,15 +1,17 @@
 model:
   base_params:
-    model_name: "meta-llama/Llama-2-7b-hf" # the model name or the endpoint name if reuse_existing is true
+    # Pass either model_name, or endpoint_name and true reuse_existing
+    # endpoint_name: "llama-2-7B-lighteval" # needs to be lower case without special characters
+    # reuse_existing: true # defaults to false; if true, ignore all params in instance, and don't delete the endpoint after evaluation
+    model_name: "meta-llama/Llama-2-7b-hf"
     revision: "main"
     dtype: "float16" # can be any of "awq", "eetq", "gptq", "4bit' or "8bit" (will use bitsandbytes), "bfloat16" or "float16"
-    reuse_existing: false # if true, ignore all params in instance, and don't delete the endpoint after evaluation
   instance:
     accelerator: "gpu"
     region: "eu-west-1"
     vendor: "aws"
-    instance_size: "x1"
     instance_type: "nvidia-a10g"
+    instance_size: "x1"
     framework: "pytorch"
     endpoint_type: "protected"
     namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace
diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py
@@ -95,7 +95,7 @@ class InferenceEndpointModelConfig:
     endpoint_type: str = "protected"
     add_special_tokens: bool = True
     revision: str = "main"
-    namespace: str = None  # The namespace under which to launch the endopint. Defaults to the current user's namespace
+    namespace: str = None  # The namespace under which to launch the endpoint. Defaults to the current user's namespace
     image_url: str = None
     env_vars: dict = None