11package corosync
22
33import (
4- "fmt"
54 "os/exec"
6- "regexp"
7- "strconv"
8- "strings"
95
106 "github.com/pkg/errors"
117 "github.com/prometheus/client_golang/prometheus"
@@ -24,9 +20,12 @@ func NewCollector(cfgToolPath string, quorumToolPath string) (*corosyncCollector
2420 collector .NewDefaultCollector ("corosync" ),
2521 cfgToolPath ,
2622 quorumToolPath ,
23+ NewParser (),
2724 }
2825 c .SetDescriptor ("quorate" , "Whether or not the cluster is quorate" , nil )
29- c .SetDescriptor ("ring_errors" , "The number of corosync ring errors" , nil )
26+ c .SetDescriptor ("rings" , "The status of each Corosync ring; 1 means healthy, 0 means faulty." , []string {"ring_id" , "node_id" , "number" , "address" })
27+ c .SetDescriptor ("ring_errors" , "The total number of faulty corosync rings" , nil )
28+ c .SetDescriptor ("member_votes" , "How many votes each member node has contributed with to the current quorum" , []string {"node_id" , "node" , "local" })
3029 c .SetDescriptor ("quorum_votes" , "Cluster quorum votes; one line per type" , []string {"type" })
3130
3231 return c , nil
@@ -36,129 +35,70 @@ type corosyncCollector struct {
3635 collector.DefaultCollector
3736 cfgToolPath string
3837 quorumToolPath string
38+ cfgToolParser Parser
3939}
4040
4141func (c * corosyncCollector ) Collect (ch chan <- prometheus.Metric ) {
4242 log .Debugln ("Collecting corosync metrics..." )
4343
44- err := c .collectRingErrorsTotal (ch )
45- if err != nil {
46- log .Warnf ("Corosync Collector scrape failed: %s" , err )
47- }
44+ // We suppress the exec errors because if any interface is faulty the tools will exit with code 1, but we still want to parse the output.
45+ cfgToolOutput , _ := exec .Command (c .cfgToolPath , "-s" ).Output ()
46+ quorumToolOutput , _ := exec .Command (c .quorumToolPath ).Output ()
4847
49- quorumStatusRaw := c .getQuoromStatus ()
50- quorumStatus , quorate , err := parseQuoromStatus (quorumStatusRaw )
48+ status , err := c .cfgToolParser .Parse (cfgToolOutput , quorumToolOutput )
5149 if err != nil {
5250 log .Warnf ("Corosync Collector scrape failed: %s" , err )
5351 return
5452 }
5553
56- ch <- c .MakeGaugeMetric ("quorate" , quorate )
57-
58- for voteType , value := range quorumStatus {
59- ch <- c .MakeGaugeMetric ("quorum_votes" , float64 (value ), voteType )
60- }
61- }
62-
63- func (c * corosyncCollector ) collectRingErrorsTotal (ch chan <- prometheus.Metric ) error {
64- ringStatus := c .getCorosyncRingStatus ()
65- ringErrorsTotal , err := parseRingStatus (ringStatus )
66- if err != nil {
67- return errors .Wrap (err , "cannot parse ring status" )
68- }
69-
70- ch <- c .MakeGaugeMetric ("ring_errors" , float64 (ringErrorsTotal ))
71-
72- return nil
54+ c .collectRings (status , ch )
55+ c .collectRingErrors (status , ch )
56+ c .collectQuorate (status , ch )
57+ c .collectQuorumVotes (status , ch )
58+ c .collectMemberVotes (status , ch )
7359}
7460
75- func (c * corosyncCollector ) getQuoromStatus () [] byte {
76- // We suppress the exec error because if any interface is faulty, the tool will exit with code 1.
77- // If all interfaces are active, exit code will be 0.
78- quorumInfoRaw , _ := exec . Command ( c . quorumToolPath ). Output ( )
79- return quorumInfoRaw
61+ func (c * corosyncCollector ) collectQuorumVotes ( status * Status , ch chan <- prometheus. Metric ) {
62+ ch <- c . MakeGaugeMetric ( "quorum_votes" , float64 ( status . QuorumVotes . ExpectedVotes ), "expected_votes" )
63+ ch <- c . MakeGaugeMetric ( "quorum_votes" , float64 ( status . QuorumVotes . HighestExpected ), "highest_expected" )
64+ ch <- c . MakeGaugeMetric ( "quorum_votes" , float64 ( status . QuorumVotes . TotalVotes ), "total_votes" )
65+ ch <- c . MakeGaugeMetric ( "quorum_votes" , float64 ( status . QuorumVotes . Quorum ), "quorum" )
8066}
8167
82- func parseQuoromStatus (quoromStatusRaw []byte ) (quorumVotes map [string ]int , quorate float64 , err error ) {
83- quoromRaw := string (quoromStatusRaw )
84- // Quorate: Yes
85-
86- // Votequorum information
87- // ----------------------
88- // Expected votes: 2
89- // Highest expected: 2
90- // Total votes: 2
91- // Quorum: 1
92-
93- // We apply the same method for all the metrics/data:
94- // first split the string for finding the word , e.g "Expected votes:", and get it via regex
95- // only the number 2,
96- // and convert it to integer type
97- numberOnly := regexp .MustCompile ("[0-9]+" )
98- wordOnly := regexp .MustCompile ("[a-zA-Z]+" )
99- quoratePresent := regexp .MustCompile ("Quorate:" )
100-
101- // In case of error, the binary is there but execution was erroring out, check output for quorate string.
102- quorateWordPresent := quoratePresent .FindString (string (quoromRaw ))
103-
104- // check the case there is an sbd_config but the SBD_DEVICE is not set
105-
106- if quorateWordPresent == "" {
107- return nil , quorate , errors .New ("cannot parse quorum status" )
108- }
109-
110- quorateRaw := wordOnly .FindString (strings .SplitAfterN (quoromRaw , "Quorate:" , 2 )[1 ])
111- quorateString := strings .ToLower (quorateRaw )
112-
113- if quorateString == "yes" {
68+ func (c * corosyncCollector ) collectQuorate (status * Status , ch chan <- prometheus.Metric ) {
69+ var quorate float64
70+ if status .Quorate {
11471 quorate = 1
11572 }
73+ ch <- c .MakeGaugeMetric ("quorate" , quorate )
74+ }
11675
117- expVotes , _ := strconv .Atoi (numberOnly .FindString (strings .SplitAfterN (quoromRaw , "Expected votes:" , 2 )[1 ]))
118- highVotes , _ := strconv .Atoi (numberOnly .FindString (strings .SplitAfterN (quoromRaw , "Highest expected:" , 2 )[1 ]))
119- totalVotes , _ := strconv .Atoi (numberOnly .FindString (strings .SplitAfterN (quoromRaw , "Total votes:" , 2 )[1 ]))
120- quorum , _ := strconv .Atoi (numberOnly .FindString (strings .SplitAfterN (quoromRaw , "Quorum:" , 2 )[1 ]))
121-
122- quorumVotes = map [string ]int {
123- "expected_votes" : expVotes ,
124- "highest_expected" : highVotes ,
125- "total_votes" : totalVotes ,
126- "quorum" : quorum ,
127- }
128-
129- if len (quorumVotes ) == 0 {
130- return quorumVotes , quorate , fmt .Errorf ("could not retrieve any quorum information" )
76+ func (c * corosyncCollector ) collectRingErrors (status * Status , ch chan <- prometheus.Metric ) {
77+ var numErrors float64
78+ for _ , ring := range status .Rings {
79+ if ring .Faulty {
80+ numErrors += 1
81+ }
13182 }
132-
133- return quorumVotes , quorate , nil
83+ ch <- c .MakeGaugeMetric ("ring_errors" , numErrors )
13484}
13585
136- // get status ring and return it as bytes
137- // this function can return also just an malformed output in case of error, we don't check.
138- // It is the parser that will check the status
139- func ( c * corosyncCollector ) getCorosyncRingStatus () [] byte {
140- // We suppress the exec error because if any interface is faulty, the tool will exit with code 1.
141- // If all interfaces are active/without error, exit code will be 0.
142- ringStatusRaw , _ := exec . Command ( c . cfgToolPath , "-s" ). Output ( )
143- return ringStatusRaw
86+ func ( c * corosyncCollector ) collectRings ( status * Status , ch chan <- prometheus. Metric ) {
87+ for _ , ring := range status . Rings {
88+ var healthy float64 = 1
89+ if ring . Faulty {
90+ healthy = 0
91+ }
92+ ch <- c . MakeGaugeMetric ( "rings" , healthy , status . RingId , status . NodeId , ring . Number , ring . Address )
93+ }
14494}
14595
146- // return the number of RingError that we will use as gauge, and error if somethings unexpected happens
147- func parseRingStatus (ringStatus []byte ) (int , error ) {
148- statusRaw := string (ringStatus )
149- // check if there is a ring ERROR first
150- ringErrorsTotal := strings .Count (statusRaw , "FAULTY" )
151-
152- // in case there is no error we need to check that the output is not
153- if ringErrorsTotal == 0 {
154- // if there is no RING ID word, the command corosync-cfgtool went wrong/error out
155- if strings .Count (statusRaw , "RING ID" ) == 0 {
156- return 0 , fmt .Errorf ("corosync-cfgtool returned unexpected output: %s" , statusRaw )
96+ func (c * corosyncCollector ) collectMemberVotes (status * Status , ch chan <- prometheus.Metric ) {
97+ for _ , member := range status .Members {
98+ local := "false"
99+ if member .Local {
100+ local = "true"
157101 }
158-
159- return 0 , nil
102+ ch <- c .MakeGaugeMetric ("member_votes" , float64 (member .Votes ), member .Id , member .Name , local )
160103 }
161-
162- // there is a ringError
163- return ringErrorsTotal , nil
164104}
0 commit comments