From 0b6b6eb57968cacdc9eecaa6a7c5301df7ef561c Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Thu, 6 Feb 2025 16:56:30 +0000 Subject: [PATCH] Updates artifacts for v0.1.0 release Signed-off-by: Kellen Swain --- pkg/README.md | 2 +- pkg/manifests/ext_proc.yaml | 2 +- pkg/manifests/vllm/deployment.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/README.md b/pkg/README.md index 04ebfde2..925134bb 100644 --- a/pkg/README.md +++ b/pkg/README.md @@ -23,7 +23,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv 1. **Install the Inference Extension CRDs:** ```sh - kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.1.0/manifests.yaml ``` 1. **Deploy InferenceModel** diff --git a/pkg/manifests/ext_proc.yaml b/pkg/manifests/ext_proc.yaml index b9b860dc..f2318e8d 100644 --- a/pkg/manifests/ext_proc.yaml +++ b/pkg/manifests/ext_proc.yaml @@ -71,7 +71,7 @@ spec: spec: containers: - name: inference-gateway-ext-proc - image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main + image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.1.0 args: - -poolName - "vllm-llama2-7b-pool" diff --git a/pkg/manifests/vllm/deployment.yaml b/pkg/manifests/vllm/deployment.yaml index 1f5073e9..341fa89d 100644 --- a/pkg/manifests/vllm/deployment.yaml +++ b/pkg/manifests/vllm/deployment.yaml @@ -14,7 +14,7 @@ spec: spec: containers: - name: lora - image: "vllm/vllm-openai:latest" + image: "vllm/vllm-openai:0.7.1" imagePullPolicy: Always command: ["python3", "-m", "vllm.entrypoints.openai.api_server"] args: