Skip to content

Commit

Permalink
add custom metrics for triton server scaling
Browse files Browse the repository at this point in the history
  • Loading branch information
ratnopamc committed Jul 14, 2024
1 parent 818bd03 commit 7b13999
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 1 deletion.
1 change: 1 addition & 0 deletions ai-ml/nvidia-triton-server/addons.tf
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ module "eks_blueprints_addons" {
})
]
chart_version = "48.1.1"
namespace = "monitoring"
set_sensitive = [
{
name = "grafana.adminPassword"
Expand Down
11 changes: 11 additions & 0 deletions ai-ml/nvidia-triton-server/helm-values/prometheus-adapter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,14 @@ rules:
matches: "num_requests_running"
as: ""
metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)
- seriesQuery: 'nv_inference_queue_duration_us{namespace!="", pod!=""}'
resources:
overrides:
namespace:
resource: "namespace"
pod:
resource: "pod"
name:
matches: "nv_inference_queue_duration_us"
as: "nv_inference_queue_duration_ms"
metricsQuery: 'avg(rate(nv_inference_queue_duration_us{<<.LabelMatchers>>}[1m])/1000) by (<<.GroupBy>>)'
12 changes: 11 additions & 1 deletion ai-ml/nvidia-triton-server/nvidia-triton-server.tf
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,17 @@ module "triton_server_vllm" {
nodeSelector:
NodeGroupType: g5-gpu-karpenter
type: karpenter
hpa:
minReplicas: 1
maxReplicas: 5
metrics:
- type: Pods
pods:
metric:
name: nv_inference_queue_duration_ms
target:
type: AverageValue
averageValue: 10
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
Expand Down

0 comments on commit 7b13999

Please sign in to comment.