Skip to content

Commit d9c4995

Browse files
authored
Merge pull request #15 from MalloZup/master
Add corosync metrics (ringFailures and quorum)
2 parents 163b4f5 + 81a170f commit d9c4995

File tree

6 files changed

+334
-12
lines changed

6 files changed

+334
-12
lines changed

Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
default: build
22

3-
build: fmt-check vet-check
3+
build: fmt-check vet-check test
44
go build .
55

66
install:
@@ -11,7 +11,8 @@ vet-check:
1111

1212
fmt-check:
1313
go fmt .
14-
14+
test:
15+
go test
1516
# This deploy the binary to a node of cluster in devel mode port :9002.
1617
# you need to change the IP adress in the script.
1718
# TODO: (In future we can add an arg var..)

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# ha_cluster_exporter
22

3+
[![Build Status](https://travis-ci.org/ClusterLabs/ha_cluster_exporter.svg?branch=master)](https://travis-ci.org/ClusterLabs/ha_cluster_exporter)
4+
5+
36
This prometheus exporter is used to serve metrics for pacemaker https://github.com/ClusterLabs/pacemaker
47

58
It should run inside a node of the cluster or both.
@@ -8,7 +11,7 @@ It should run inside a node of the cluster or both.
811

912
- expose cluster node and resource metrics via `crm_mon` (pacemaker data xml)
1013

11-
- expose corosync metrics **not done yet WIP**
14+
- expose corosync metrics (ring errors, quorum metrics)
1215

1316
# Devel:
1417

corosync_metrics.go

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"log"
6+
"os/exec"
7+
"regexp"
8+
"strconv"
9+
"strings"
10+
)
11+
12+
// Quorum metrics
13+
14+
// return the output of quorum in raw format
15+
func getQuoromClusterInfo() []byte {
16+
// get ringStatus
17+
log.Println("[INFO]: Reading quorum status with corosync-quorumtool...")
18+
// ignore error because If any interfaces are faulty, 1 is returned by the binary. If all interfaces
19+
// are active 0 is returned to the shell.
20+
quorumInfoRaw, _ := exec.Command("/usr/sbin/corosync-quorumtool").Output()
21+
return quorumInfoRaw
22+
}
23+
24+
func parseQuoromStatus(quoromStatus []byte) (map[string]int, string) {
25+
quoromRaw := string(quoromStatus)
26+
// Quorate: Yes
27+
28+
// Votequorum information
29+
// ----------------------
30+
// Expected votes: 2
31+
// Highest expected: 2
32+
// Total votes: 2
33+
// Quorum: 1
34+
35+
// We apply the same method for all the metrics/data:
36+
// first split the string for finding the word , e.g "Expected votes:", and get it via regex
37+
// only the number 2,
38+
// and convert it to integer type
39+
numberOnly := regexp.MustCompile("[0-9]+")
40+
wordOnly := regexp.MustCompile("[a-zA-Z]+")
41+
42+
quorateRaw := wordOnly.FindString(strings.SplitAfterN(quoromRaw, "Quorate:", 2)[1])
43+
quorate := strings.ToLower(quorateRaw)
44+
expVotes, _ := strconv.Atoi(numberOnly.FindString(strings.SplitAfterN(quoromRaw, "Expected votes:", 2)[1]))
45+
highVotes, _ := strconv.Atoi(numberOnly.FindString(strings.SplitAfterN(quoromRaw, "Highest expected:", 2)[1]))
46+
totalVotes, _ := strconv.Atoi(numberOnly.FindString(strings.SplitAfterN(quoromRaw, "Total votes:", 2)[1]))
47+
quorum, _ := strconv.Atoi(numberOnly.FindString(strings.SplitAfterN(quoromRaw, "Quorum:", 2)[1]))
48+
49+
voteQuorumInfo := map[string]int{
50+
"expectedVotes": expVotes,
51+
"highestExpected": highVotes,
52+
"totalVotes": totalVotes,
53+
"quorum": quorum,
54+
}
55+
56+
return voteQuorumInfo, quorate
57+
}
58+
59+
// RING metrics
60+
61+
// get status ring and return it as bytes
62+
// this function can return also just an malformed output in case of error, we don't check.
63+
// It is the parser that will check the status
64+
func getCorosyncRingStatus() []byte {
65+
// get ringStatus
66+
log.Println("[INFO]: Reading ring status with corosync-cfgtool...")
67+
// ignore error because If any interfaces are faulty, 1 is returned by the binary. If all interfaces
68+
// are active 0 is returned to the shell.
69+
ringStatusRaw, _ := exec.Command("/usr/sbin/corosync-cfgtool", "-s").Output()
70+
return ringStatusRaw
71+
}
72+
73+
// return the number of RingError that we will use as gauge, and error if somethings unexpected happens
74+
func parseRingStatus(ringStatus []byte) (int, error) {
75+
statusRaw := string(ringStatus)
76+
// check if there is a ring ERROR first
77+
ringErrorsTotal := strings.Count(statusRaw, "FAULTY")
78+
79+
// in case there is no error we need to check that the output is not
80+
if ringErrorsTotal == 0 {
81+
// if there is no RING ID word, the command corosync-cfgtool went wrong/error out
82+
if strings.Count(statusRaw, "RING ID") == 0 {
83+
return 0, fmt.Errorf("[ERROR]: corosync-cfgtool command returned an unexpected error %s", statusRaw)
84+
}
85+
86+
return 0, nil
87+
}
88+
89+
// there is a ringError
90+
return ringErrorsTotal, nil
91+
}

corosync_metrics_test.go

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"testing"
6+
)
7+
8+
// TEST group quorum metrics
9+
func TestQuoromMetricParsing(t *testing.T) {
10+
// the data is fake data
11+
fmt.Println("=== Testing quorum info retrivial")
12+
quoromStatus := `
13+
Quorum information
14+
------------------
15+
Date: Sun Sep 29 16:10:37 2019
16+
Quorum provider: corosync_votequorum
17+
Nodes: 2
18+
Node ID: 1084780051
19+
Ring ID: 1084780051/44
20+
Quorate: Yes
21+
22+
Votequorum information
23+
----------------------
24+
Expected votes: 232
25+
Highest expected: 22
26+
Total votes: 21
27+
Quorum: 421
28+
Flags: 2Node Quorate WaitForAll
29+
30+
Membership information
31+
----------------------
32+
Nodeid Votes Name
33+
1084780051 1 dma-dog-hana01 (local)
34+
1084780052 1 dma-dog-hana02
35+
dma-dog-hana01:~ #
36+
`
37+
getQuoromClusterInfo()
38+
voteQuorumInfo, quorate := parseQuoromStatus([]byte(quoromStatus))
39+
40+
if voteQuorumInfo["expectedVotes"] != 232 {
41+
t.Errorf("expectedVotes should be 232 got instead: %d", voteQuorumInfo["expectedVotes"])
42+
}
43+
if voteQuorumInfo["highestExpected"] != 22 {
44+
t.Errorf("expectedVotes should be 232 got instead: %d", voteQuorumInfo["highestExpected"])
45+
}
46+
47+
if voteQuorumInfo["totalVotes"] != 21 {
48+
t.Errorf("expectedVotes should be 232 got instead: %d", voteQuorumInfo["totalVotes"])
49+
}
50+
51+
if voteQuorumInfo["quorum"] != 421 {
52+
t.Errorf("expectedVotes should be 421 got instead: %d", voteQuorumInfo["quorum"])
53+
}
54+
55+
if quorate != "yes" {
56+
t.Errorf("quorate should be set to Yes, got %s", quorate)
57+
}
58+
59+
}
60+
61+
// TEST group RING metrics
62+
// test that we recognize 1 error (for increasing metric later)
63+
func TestOneRingError(t *testing.T) {
64+
fmt.Println("=== Test one ring error")
65+
ringStatusWithOneError := `Printing ring status.
66+
Local node ID 16777226
67+
RING ID 0
68+
id = 10.0.0.1
69+
status = Marking ringid 0 interface 10.0.0.1 FAULTY
70+
RING ID 1
71+
id = 172.16.0.1
72+
status = ring 1 active with no faults
73+
`
74+
75+
getCorosyncRingStatus()
76+
ringErrorsTotal, err := parseRingStatus([]byte(ringStatusWithOneError))
77+
RingExpectedErrors := 1
78+
if ringErrorsTotal != RingExpectedErrors {
79+
t.Errorf("ringErrors was incorrect, got: %d, expected: %d.", ringErrorsTotal, RingExpectedErrors)
80+
}
81+
if err != nil {
82+
t.Errorf("error should be nil got instead: %s", err)
83+
}
84+
}
85+
86+
func TestZeroRingErrors(t *testing.T) {
87+
fmt.Println("=== Test zero Ring errors")
88+
ringStatusWithOneError := `Printing ring status.
89+
Local node ID 16777226
90+
RING ID 0
91+
id = 10.0.0.1
92+
status = Marking ringid 0 interface 10.0.0.1
93+
RING ID 1
94+
id = 172.16.0.1
95+
status = ring 1 active with no faults
96+
`
97+
98+
getCorosyncRingStatus()
99+
ringErrorsTotal, err := parseRingStatus([]byte(ringStatusWithOneError))
100+
RingExpectedErrors := 0
101+
if ringErrorsTotal != RingExpectedErrors {
102+
t.Errorf("ringErrors was incorrect, got: %d, expected: %d.", ringErrorsTotal, RingExpectedErrors)
103+
}
104+
if err != nil {
105+
t.Errorf("error should be nil got instead: %s", err)
106+
}
107+
}
108+
109+
// test that we recognize 3 rings error (for increasing metric later)
110+
func TestMultipleRingErrors(t *testing.T) {
111+
fmt.Println("=== Test multiples ring error")
112+
ringStatusWithOneError := `Printing ring status.
113+
Local node ID 16777226
114+
RING ID 0
115+
id = 10.0.0.1
116+
status = Marking ringid 0 interface 10.0.0.1 FAULTY
117+
RING ID 1
118+
id = 172.16.0.1
119+
status = ring 1 active with no faults
120+
RING ID 2
121+
id = 10.0.0.1
122+
status = Marking ringid 1 interface 10.0.0.1 FAULTY
123+
RING ID 3
124+
id = 172.16.0.1
125+
status = ring 1 active with no faults
126+
RING ID 4
127+
id = 10.0.0.1
128+
status = Marking ringid 1 interface 10.0.0.1 FAULTY
129+
RING ID 5
130+
id = 172.16.0.1
131+
status = ring 1 active with no faults
132+
133+
`
134+
135+
getCorosyncRingStatus()
136+
ringErrorsTotal, err := parseRingStatus([]byte(ringStatusWithOneError))
137+
RingExpectedErrors := 3
138+
if ringErrorsTotal != RingExpectedErrors {
139+
t.Errorf("ringErrors was incorrect, got: %d, expected: %d.", ringErrorsTotal, RingExpectedErrors)
140+
}
141+
if err != nil {
142+
t.Errorf("error should be nil got instead: %s", err)
143+
}
144+
}
145+
146+
// test that in case of system unexpected error we detect this
147+
func TestSystemUnexpectedError(t *testing.T) {
148+
fmt.Println("=== Test unexpected error")
149+
// since there is no cluster in a Test env. this will return an error
150+
ringStatusError := getCorosyncRingStatus()
151+
parseRingStatus([]byte(ringStatusError))
152+
ringErrorsTotal, err := parseRingStatus([]byte(ringStatusError))
153+
RingExpectedErrors := 0
154+
if ringErrorsTotal != RingExpectedErrors {
155+
t.Errorf("ringErrors was incorrect, got: %d, expected: %d.", ringErrorsTotal, RingExpectedErrors)
156+
}
157+
if err == nil {
158+
t.Errorf("error should not be nil got !!")
159+
}
160+
161+
}

0 commit comments

Comments
 (0)