From c57303633f006cb4ef42380ef82896b802083e70 Mon Sep 17 00:00:00 2001 From: alanty <45295374+alanty@users.noreply.github.com> Date: Fri, 31 Jan 2025 11:53:03 -0800 Subject: [PATCH] feat: Enable spark-operator prometheus metrics (#737) --- .../terraform/spark-k8s-operator/addons.tf | 18 ++++++++++++++++++ .../kube-prometheus-amp-enable.yaml | 5 +++++ .../helm-values/kube-prometheus.yaml | 5 +++++ 3 files changed, 28 insertions(+) diff --git a/analytics/terraform/spark-k8s-operator/addons.tf b/analytics/terraform/spark-k8s-operator/addons.tf index a74683682..22de17370 100644 --- a/analytics/terraform/spark-k8s-operator/addons.tf +++ b/analytics/terraform/spark-k8s-operator/addons.tf @@ -386,6 +386,24 @@ module "eks_data_addons" { rbac: # -- Specifies whether to create RBAC resources for the controller. create: false + prometheus: + metrics: + enable: true + port: 8080 + portName: metrics + endpoint: /metrics + prefix: "" + # Prometheus pod monitor for controller pods + podMonitor: + # -- Specifies whether to create pod monitor. + create: true + labels: {} + # -- The label to use to retrieve the job name from + jobLabel: spark-operator-podmonitor + # -- Prometheus metrics endpoint properties. `metrics.portName` will be used as a port + podMetricsEndpoint: + scheme: http + interval: 5s EOT ] } diff --git a/analytics/terraform/spark-k8s-operator/helm-values/kube-prometheus-amp-enable.yaml b/analytics/terraform/spark-k8s-operator/helm-values/kube-prometheus-amp-enable.yaml index 112ea39ef..23d34ced5 100644 --- a/analytics/terraform/spark-k8s-operator/helm-values/kube-prometheus-amp-enable.yaml +++ b/analytics/terraform/spark-k8s-operator/helm-values/kube-prometheus-amp-enable.yaml @@ -28,6 +28,11 @@ prometheus: resources: requests: storage: 50Gi + # Find monitors in all namespaces + podMonitorSelectorNilUsesHelmValues: false + probeSelectorNilUsesHelmValues: false + ruleSelectorNilUsesHelmValues: false + serviceMonitorSelectorNilUsesHelmValues: false # Scrape Cost metrics for Karpenter and Yunikorn add-ons additionalScrapeConfigs: - job_name: yunikorn diff --git a/analytics/terraform/spark-k8s-operator/helm-values/kube-prometheus.yaml b/analytics/terraform/spark-k8s-operator/helm-values/kube-prometheus.yaml index 899292abc..661d4a193 100644 --- a/analytics/terraform/spark-k8s-operator/helm-values/kube-prometheus.yaml +++ b/analytics/terraform/spark-k8s-operator/helm-values/kube-prometheus.yaml @@ -15,6 +15,11 @@ prometheus: resources: requests: storage: 50Gi + # Find monitors in all namespaces + podMonitorSelectorNilUsesHelmValues: false + probeSelectorNilUsesHelmValues: false + ruleSelectorNilUsesHelmValues: false + serviceMonitorSelectorNilUsesHelmValues: false # Scrape Cost metrics for Karpenter and Yunikorn add-ons additionalScrapeConfigs: - job_name: yunikorn