You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
* Delete type and rename model in endpoint docs
* Explain to pass either model_name or endpoint_name+reuse_existing
* Fix legacy instance type and size in docs
* Minor fix
endpoint_name: "llama-2-7B-lighteval"# needs to be lower case without special characters
32
-
model: "meta-llama/Llama-2-7b-hf"
30
+
# Pass either model_name, or endpoint_name and true reuse_existing
31
+
# endpoint_name: "llama-2-7B-lighteval" # needs to be lower case without special characters
32
+
# reuse_existing: true # defaults to false; if true, ignore all params in instance, and don't delete the endpoint after evaluation
33
+
model_name: "meta-llama/Llama-2-7b-hf"
33
34
revision: "main"
34
35
dtype: "float16"# can be any of "awq", "eetq", "gptq", "4bit' or "8bit" (will use bitsandbytes), "bfloat16" or "float16"
35
-
reuse_existing: false # if true, ignore all params in instance, and don't delete the endpoint after evaluation
36
36
instance:
37
37
accelerator: "gpu"
38
38
region: "eu-west-1"
39
39
vendor: "aws"
40
-
instance_size: "medium"
41
-
instance_type: "g5.2xlarge"
40
+
instance_type: "nvidia-a10g"
41
+
instance_size: "x1"
42
42
framework: "pytorch"
43
43
endpoint_type: "protected"
44
-
namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace
44
+
namespace: null # The namespace under which to launch the endpoint. Defaults to the current user's namespace
45
45
image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models.
46
46
env_vars:
47
47
null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048`
0 commit comments