-
Notifications
You must be signed in to change notification settings - Fork 171
/
Copy pathbroker-alerts.yml
74 lines (66 loc) · 2.85 KB
/
broker-alerts.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
groups:
- name: brokers
rules:
# Page when ActiveControllerCount != 1 for more than 10s.
- alert: ActiveControllerCount != 1
expr: sum by (env) (kafka_controller_kafkacontroller_activecontrollercount) != 1
for: 10s
labels:
severity: page
annotations:
summary: "Active controller count for env {{ $labels.env }}"
description: "Active controller count for env {{ $labels.env }} is {{ $value }}. "
# Page when OfflinePartitionsCount > 0
- alert: OfflinePartitionCount > 0
expr: sum by(env) (kafka_controller_kafkacontroller_offlinepartitionscount) > 0
for: 10s
labels:
severity: page
annotations:
summary: "Offline partitions for env {{ $labels.env }}"
description: "Offline partitions count for env {{ $labels.env }} is {{ $value }}. "
# Page when UncleanLeaderElectionsPerSec > 0 for more than 1m.
- alert: UncleanLeaderElectionsPerSec > 0
expr: sum by(env) (kafka_controller_controllerstats_uncleanleaderelectionspersec) > 0
for: 1m
labels:
severity: page
annotations:
summary: "UncleanLeaderElectionsPerSec for env {{ $labels.env }}"
description: "UncleanLeaderElectionsPerSec for env {{ $labels.env }} is {{ $value }}. "
# Page when UnderReplicatedPartitions > 0 for more than 1m.
- alert: UnderReplicatedPartitions > 0
expr: sum by(env) (kafka_server_replicamanager_underreplicatedpartitions) > 0
for: 1m
labels:
severity: page
annotations:
summary: "UnderReplicatedPartitions for env {{ $labels.env }}"
description: "UnderReplicatedPartitions for env {{ $labels.env }} is {{ $value }}. "
# Page when UnderMinIsrPartitionCount > 0 for more than 1m.
- alert: UnderMinIsrPartitionCount > 0
expr: sum by (env) (kafka_cluster_partition_underminisr) > 0
for: 1m
labels:
severity: page
annotations:
summary: "UnderMinIsrPartitionCount for env {{ $labels.env }}"
description: "UnderMinIsrPartitionCount for env {{ $labels.env }} is {{ $value }}. "
# Page when Broker IO Activity < 40% for more than 10m.
- alert: Broker IO Activity < 0.4
expr: kafka_server_kafkarequesthandlerpool_controlplanerequesthandleravgidlepercent < 1
for: 1m
labels:
severity: page
annotations:
summary: "Broker IO Activity < for env {{ $labels.env }}"
description: "Broker IO Activity for env {{ $labels.env }} and broker {{ $label.instance }} is {{ $value }}. "
# Page when Broker Network Activity < 40% for more than 10m.
- alert: Broker IO Activity < 40%
expr: kafka_server_kafkarequesthandlerpool_controlplanerequesthandleravgidlepercent < 0.4
for: 1m
labels:
severity: page
annotations:
summary: "Broker IO Activity < 40% for env {{ $labels.env }}"
description: "Broker IO Activity for env {{ $labels.env }} and broker {{ $label.instance }} is {{ $value }}. "