diff --git a/helm-charts/support/values.yaml b/helm-charts/support/values.yaml index 855c033d50..6451176f4b 100644 --- a/helm-charts/support/values.yaml +++ b/helm-charts/support/values.yaml @@ -281,6 +281,12 @@ grafana: # type Recreate is required since we attach a PVC that can only be used by # mounted for writing by one pod at the time. type: Recreate + readinessProbe: + # With one grafana pod replica, having a readiness probe fail is pointless. + # We ensure it won't fail before the livenessProbe that would restart the + # container. + failureThreshold: 1000 + initialDelaySeconds: 1 rbac: # namespaced makes us not get ClusterRole service accounts etc, and we do @@ -297,8 +303,10 @@ grafana: # prometheus and grafana. # # Grafana's memory use seems to increase over time but seems reasonable to - # stay below 200Mi for years to come. Grafana's CPU use seems minuscule with - # peaks at up to 9m CPU from one user is browsing its dashboards. + # stay below 200Mi in general. Memory can peak when dashboards are updated, + # and was increased to 400Mi as its been seen getting OOMKilled. Grafana's CPU + # use seems minuscule with peaks at up to 9m CPU from one user is browsing its + # dashboards. # # PromQL queries for CPU and memory use: # - CPU: sum(rate(container_cpu_usage_seconds_total{container="grafana", namespace="support"}[5m])) by (pod) @@ -307,7 +315,7 @@ grafana: resources: limits: cpu: 100m - memory: 200Mi + memory: 400Mi requests: cpu: 10m memory: 200Mi