Skip to content

Commit b5eb4fd

Browse files
author
Stefano Torresi
authored
Merge pull request #151 from stefanotorresi/feature/corosync-new-features
Corosync new features
2 parents 2a5404a + c8d4e1a commit b5eb4fd

File tree

7 files changed

+525
-221
lines changed

7 files changed

+525
-221
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ test: download
4848
go test -v ./...
4949

5050
coverage:
51-
@mkdir build
51+
@mkdir -p build
5252
go test -cover -coverprofile=build/coverage ./...
5353
go tool cover -html=build/coverage
5454

collector/corosync/corosync.go

Lines changed: 45 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
11
package corosync
22

33
import (
4-
"fmt"
54
"os/exec"
6-
"regexp"
7-
"strconv"
8-
"strings"
95

106
"github.com/pkg/errors"
117
"github.com/prometheus/client_golang/prometheus"
@@ -24,9 +20,12 @@ func NewCollector(cfgToolPath string, quorumToolPath string) (*corosyncCollector
2420
collector.NewDefaultCollector("corosync"),
2521
cfgToolPath,
2622
quorumToolPath,
23+
NewParser(),
2724
}
2825
c.SetDescriptor("quorate", "Whether or not the cluster is quorate", nil)
29-
c.SetDescriptor("ring_errors", "The number of corosync ring errors", nil)
26+
c.SetDescriptor("rings", "The status of each Corosync ring; 1 means healthy, 0 means faulty.", []string{"ring_id", "node_id", "number", "address"})
27+
c.SetDescriptor("ring_errors", "The total number of faulty corosync rings", nil)
28+
c.SetDescriptor("member_votes", "How many votes each member node has contributed with to the current quorum", []string{"node_id", "node", "local"})
3029
c.SetDescriptor("quorum_votes", "Cluster quorum votes; one line per type", []string{"type"})
3130

3231
return c, nil
@@ -36,129 +35,70 @@ type corosyncCollector struct {
3635
collector.DefaultCollector
3736
cfgToolPath string
3837
quorumToolPath string
38+
cfgToolParser Parser
3939
}
4040

4141
func (c *corosyncCollector) Collect(ch chan<- prometheus.Metric) {
4242
log.Debugln("Collecting corosync metrics...")
4343

44-
err := c.collectRingErrorsTotal(ch)
45-
if err != nil {
46-
log.Warnf("Corosync Collector scrape failed: %s", err)
47-
}
44+
// We suppress the exec errors because if any interface is faulty the tools will exit with code 1, but we still want to parse the output.
45+
cfgToolOutput, _ := exec.Command(c.cfgToolPath, "-s").Output()
46+
quorumToolOutput, _ := exec.Command(c.quorumToolPath).Output()
4847

49-
quorumStatusRaw := c.getQuoromStatus()
50-
quorumStatus, quorate, err := parseQuoromStatus(quorumStatusRaw)
48+
status, err := c.cfgToolParser.Parse(cfgToolOutput, quorumToolOutput)
5149
if err != nil {
5250
log.Warnf("Corosync Collector scrape failed: %s", err)
5351
return
5452
}
5553

56-
ch <- c.MakeGaugeMetric("quorate", quorate)
57-
58-
for voteType, value := range quorumStatus {
59-
ch <- c.MakeGaugeMetric("quorum_votes", float64(value), voteType)
60-
}
61-
}
62-
63-
func (c *corosyncCollector) collectRingErrorsTotal(ch chan<- prometheus.Metric) error {
64-
ringStatus := c.getCorosyncRingStatus()
65-
ringErrorsTotal, err := parseRingStatus(ringStatus)
66-
if err != nil {
67-
return errors.Wrap(err, "cannot parse ring status")
68-
}
69-
70-
ch <- c.MakeGaugeMetric("ring_errors", float64(ringErrorsTotal))
71-
72-
return nil
54+
c.collectRings(status, ch)
55+
c.collectRingErrors(status, ch)
56+
c.collectQuorate(status, ch)
57+
c.collectQuorumVotes(status, ch)
58+
c.collectMemberVotes(status, ch)
7359
}
7460

75-
func (c *corosyncCollector) getQuoromStatus() []byte {
76-
// We suppress the exec error because if any interface is faulty, the tool will exit with code 1.
77-
// If all interfaces are active, exit code will be 0.
78-
quorumInfoRaw, _ := exec.Command(c.quorumToolPath).Output()
79-
return quorumInfoRaw
61+
func (c *corosyncCollector) collectQuorumVotes(status *Status, ch chan<- prometheus.Metric) {
62+
ch <- c.MakeGaugeMetric("quorum_votes", float64(status.QuorumVotes.ExpectedVotes), "expected_votes")
63+
ch <- c.MakeGaugeMetric("quorum_votes", float64(status.QuorumVotes.HighestExpected), "highest_expected")
64+
ch <- c.MakeGaugeMetric("quorum_votes", float64(status.QuorumVotes.TotalVotes), "total_votes")
65+
ch <- c.MakeGaugeMetric("quorum_votes", float64(status.QuorumVotes.Quorum), "quorum")
8066
}
8167

82-
func parseQuoromStatus(quoromStatusRaw []byte) (quorumVotes map[string]int, quorate float64, err error) {
83-
quoromRaw := string(quoromStatusRaw)
84-
// Quorate: Yes
85-
86-
// Votequorum information
87-
// ----------------------
88-
// Expected votes: 2
89-
// Highest expected: 2
90-
// Total votes: 2
91-
// Quorum: 1
92-
93-
// We apply the same method for all the metrics/data:
94-
// first split the string for finding the word , e.g "Expected votes:", and get it via regex
95-
// only the number 2,
96-
// and convert it to integer type
97-
numberOnly := regexp.MustCompile("[0-9]+")
98-
wordOnly := regexp.MustCompile("[a-zA-Z]+")
99-
quoratePresent := regexp.MustCompile("Quorate:")
100-
101-
// In case of error, the binary is there but execution was erroring out, check output for quorate string.
102-
quorateWordPresent := quoratePresent.FindString(string(quoromRaw))
103-
104-
// check the case there is an sbd_config but the SBD_DEVICE is not set
105-
106-
if quorateWordPresent == "" {
107-
return nil, quorate, errors.New("cannot parse quorum status")
108-
}
109-
110-
quorateRaw := wordOnly.FindString(strings.SplitAfterN(quoromRaw, "Quorate:", 2)[1])
111-
quorateString := strings.ToLower(quorateRaw)
112-
113-
if quorateString == "yes" {
68+
func (c *corosyncCollector) collectQuorate(status *Status, ch chan<- prometheus.Metric) {
69+
var quorate float64
70+
if status.Quorate {
11471
quorate = 1
11572
}
73+
ch <- c.MakeGaugeMetric("quorate", quorate)
74+
}
11675

117-
expVotes, _ := strconv.Atoi(numberOnly.FindString(strings.SplitAfterN(quoromRaw, "Expected votes:", 2)[1]))
118-
highVotes, _ := strconv.Atoi(numberOnly.FindString(strings.SplitAfterN(quoromRaw, "Highest expected:", 2)[1]))
119-
totalVotes, _ := strconv.Atoi(numberOnly.FindString(strings.SplitAfterN(quoromRaw, "Total votes:", 2)[1]))
120-
quorum, _ := strconv.Atoi(numberOnly.FindString(strings.SplitAfterN(quoromRaw, "Quorum:", 2)[1]))
121-
122-
quorumVotes = map[string]int{
123-
"expected_votes": expVotes,
124-
"highest_expected": highVotes,
125-
"total_votes": totalVotes,
126-
"quorum": quorum,
127-
}
128-
129-
if len(quorumVotes) == 0 {
130-
return quorumVotes, quorate, fmt.Errorf("could not retrieve any quorum information")
76+
func (c *corosyncCollector) collectRingErrors(status *Status, ch chan<- prometheus.Metric) {
77+
var numErrors float64
78+
for _, ring := range status.Rings {
79+
if ring.Faulty {
80+
numErrors += 1
81+
}
13182
}
132-
133-
return quorumVotes, quorate, nil
83+
ch <- c.MakeGaugeMetric("ring_errors", numErrors)
13484
}
13585

136-
// get status ring and return it as bytes
137-
// this function can return also just an malformed output in case of error, we don't check.
138-
// It is the parser that will check the status
139-
func (c *corosyncCollector) getCorosyncRingStatus() []byte {
140-
// We suppress the exec error because if any interface is faulty, the tool will exit with code 1.
141-
// If all interfaces are active/without error, exit code will be 0.
142-
ringStatusRaw, _ := exec.Command(c.cfgToolPath, "-s").Output()
143-
return ringStatusRaw
86+
func (c *corosyncCollector) collectRings(status *Status, ch chan<- prometheus.Metric) {
87+
for _, ring := range status.Rings {
88+
var healthy float64 = 1
89+
if ring.Faulty {
90+
healthy = 0
91+
}
92+
ch <- c.MakeGaugeMetric("rings", healthy, status.RingId, status.NodeId, ring.Number, ring.Address)
93+
}
14494
}
14595

146-
// return the number of RingError that we will use as gauge, and error if somethings unexpected happens
147-
func parseRingStatus(ringStatus []byte) (int, error) {
148-
statusRaw := string(ringStatus)
149-
// check if there is a ring ERROR first
150-
ringErrorsTotal := strings.Count(statusRaw, "FAULTY")
151-
152-
// in case there is no error we need to check that the output is not
153-
if ringErrorsTotal == 0 {
154-
// if there is no RING ID word, the command corosync-cfgtool went wrong/error out
155-
if strings.Count(statusRaw, "RING ID") == 0 {
156-
return 0, fmt.Errorf("corosync-cfgtool returned unexpected output: %s", statusRaw)
96+
func (c *corosyncCollector) collectMemberVotes(status *Status, ch chan<- prometheus.Metric) {
97+
for _, member := range status.Members {
98+
local := "false"
99+
if member.Local {
100+
local = "true"
157101
}
158-
159-
return 0, nil
102+
ch <- c.MakeGaugeMetric("member_votes", float64(member.Votes), member.Id, member.Name, local)
160103
}
161-
162-
// there is a ringError
163-
return ringErrorsTotal, nil
164104
}

collector/corosync/corosync_test.go

Lines changed: 0 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -8,116 +8,6 @@ import (
88
assertcustom "github.com/ClusterLabs/ha_cluster_exporter/internal/assert"
99
)
1010

11-
// TEST group quorum metrics
12-
func TestQuoromMetricParsing(t *testing.T) {
13-
// the data is fake
14-
quoromStatus := `
15-
Quorum information
16-
------------------
17-
Date: Sun Sep 29 16:10:37 2019
18-
Quorum provider: corosync_votequorum
19-
Nodes: 2
20-
Node ID: 1084780051
21-
Ring ID: 1084780051/44
22-
Quorate: Yes
23-
24-
Votequorum information
25-
----------------------
26-
Expected votes: 232
27-
Highest expected: 22
28-
Total votes: 21
29-
Quorum: 421
30-
Flags: 2Node Quorate WaitForAll
31-
32-
Membership information
33-
----------------------
34-
Nodeid Votes Name
35-
1084780051 1 dma-dog-hana01 (local)
36-
1084780052 1 dma-dog-hana02
37-
`
38-
voteQuorumInfo, quorate, _ := parseQuoromStatus([]byte(quoromStatus))
39-
40-
assert.Equal(t, 232, voteQuorumInfo["expected_votes"])
41-
assert.Equal(t, 22, voteQuorumInfo["highest_expected"])
42-
assert.Equal(t, 21, voteQuorumInfo["total_votes"])
43-
assert.Equal(t, 421, voteQuorumInfo["quorum"])
44-
assert.Equal(t, 1.0, quorate)
45-
}
46-
47-
// TEST group RING metrics
48-
// test that we recognize 1 error (for increasing metric later)
49-
func TestOneRingError(t *testing.T) {
50-
ringStatusWithOneError := `Printing ring status.
51-
Local node ID 16777226
52-
RING ID 0
53-
id = 10.0.0.1
54-
status = Marking ringid 0 interface 10.0.0.1 FAULTY
55-
RING ID 1
56-
id = 172.16.0.1
57-
status = ring 1 active with no faults
58-
`
59-
60-
ringErrorsTotal, err := parseRingStatus([]byte(ringStatusWithOneError))
61-
62-
assert.Nil(t, err)
63-
assert.Equal(t, 1, ringErrorsTotal)
64-
}
65-
66-
func TestZeroRingErrors(t *testing.T) {
67-
ringStatusWithOneError := `Printing ring status.
68-
Local node ID 16777226
69-
RING ID 0
70-
id = 10.0.0.1
71-
status = Marking ringid 0 interface 10.0.0.1
72-
RING ID 1
73-
id = 172.16.0.1
74-
status = ring 1 active with no faults
75-
`
76-
77-
ringErrorsTotal, err := parseRingStatus([]byte(ringStatusWithOneError))
78-
79-
assert.Nil(t, err)
80-
assert.Equal(t, 0, ringErrorsTotal)
81-
}
82-
83-
// test that we recognize 3 rings error (for increasing metric later)
84-
func TestMultipleRingErrors(t *testing.T) {
85-
ringStatusWithOneError := `Printing ring status.
86-
Local node ID 16777226
87-
RING ID 0
88-
id = 10.0.0.1
89-
status = Marking ringid 0 interface 10.0.0.1 FAULTY
90-
RING ID 1
91-
id = 172.16.0.1
92-
status = ring 1 active with no faults
93-
RING ID 2
94-
id = 10.0.0.1
95-
status = Marking ringid 1 interface 10.0.0.1 FAULTY
96-
RING ID 3
97-
id = 172.16.0.1
98-
status = ring 1 active with no faults
99-
RING ID 4
100-
id = 10.0.0.1
101-
status = Marking ringid 1 interface 10.0.0.1 FAULTY
102-
RING ID 5
103-
id = 172.16.0.1
104-
status = ring 1 active with no faults
105-
106-
`
107-
108-
ringErrorsTotal, err := parseRingStatus([]byte(ringStatusWithOneError))
109-
110-
assert.Nil(t, err)
111-
assert.Equal(t, 3, ringErrorsTotal)
112-
}
113-
114-
func TestRingStatusParsingError(t *testing.T) {
115-
_, err := parseRingStatus([]byte("some error occurred"))
116-
117-
assert.Error(t, err)
118-
assert.Contains(t, err.Error(), "some error occurred")
119-
}
120-
12111
func TestNewCorosyncCollector(t *testing.T) {
12212
_, err := NewCollector("../../test/fake_corosync-cfgtool.sh", "../../test/fake_corosync-quorumtool.sh")
12313
assert.Nil(t, err)

0 commit comments

Comments
 (0)