Skip to content

Commit 4253d03

Browse files
committed
Implement mechanism for updating every 2 second
metrics from cluster. THe label metric need some to be adapted to change the mocked number
1 parent 9e72f83 commit 4253d03

File tree

1 file changed

+86
-60
lines changed

1 file changed

+86
-60
lines changed

main.go

Lines changed: 86 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"net/http"
99
"os/exec"
1010
"sort"
11+
"time"
1112

1213
"github.com/prometheus/client_golang/prometheus"
1314
"github.com/prometheus/client_golang/prometheus/promhttp"
@@ -308,10 +309,10 @@ var (
308309
prometheus.GaugeOpts{
309310
Name: "cluster_resources",
310311
Help: "number of cluster resources",
311-
}, []string{"role"})
312+
}, []string{"node", "resource_name", "role"})
312313
)
313314

314-
func init() {
315+
func initMetrics() {
315316
// Metrics have to be registered to be exposed:
316317
prometheus.MustRegister(clusterNodesConf)
317318
prometheus.MustRegister(clusterNodesOnline)
@@ -343,69 +344,94 @@ func init() {
343344
var portNumber = flag.String("port", ":9001", "The port number to listen on for HTTP requests.")
344345

345346
func main() {
347+
// read cli option and setup initial stat
346348
flag.Parse()
347-
// get cluster status xml
348-
monxml, err := exec.Command("/usr/sbin/crm_mon", "-1", "--as-xml", "--group-by-node", "--inactive").Output()
349-
if err != nil {
350-
fmt.Println("[ERROR]: crm_mon command was not executed correctly. Did you have crm_mon installed ?")
351-
panic(err)
352-
}
349+
initMetrics()
350+
http.Handle("/metrics", promhttp.Handler())
353351

354-
var status crmMon
355-
err = xml.Unmarshal(monxml, &status)
356-
if err != nil {
357-
panic(err)
358-
}
352+
// parse each 2 seconds the cluster configuration and update the metrics accordingly
353+
// this is done in a goroutine async. we update in this way each 2 second the metrics. (the second will be a parameter in future)
354+
go func() {
359355

360-
metrics := parseGenericMetrics(&status)
361-
// add genric node metrics
362-
clusterNodesConf.Set(float64(metrics.Node.Configured))
363-
clusterNodesOnline.Set(float64(metrics.Node.Online))
364-
clusterNodesStandby.Set(float64(metrics.Node.Standby))
365-
clusterNodesStandbyOnFail.Set(float64(metrics.Node.StandbyOnFail))
366-
clusterNodesMaintenance.Set(float64(metrics.Node.Maintenance))
367-
clusterNodesPending.Set(float64(metrics.Node.Pending))
368-
clusterNodesUnclean.Set(float64(metrics.Node.Unclean))
369-
clusterNodesShutdown.Set(float64(metrics.Node.Shutdown))
370-
clusterNodesExpectedUp.Set(float64(metrics.Node.ExpectedUp))
371-
clusterNodesDC.Set(float64(metrics.Node.DC))
372-
// add genric resource metrics
373-
clusterResourcesUnique.Set(float64(metrics.Resource.Unique))
374-
clusterResourcesDisabled.Set(float64(metrics.Resource.Disabled))
375-
clusterResourcesConf.Set(float64(metrics.Resource.Configured))
376-
clusterResourcesActive.Set(float64(metrics.Resource.Active))
377-
clusterResourcesOrphaned.Set(float64(metrics.Resource.Orphaned))
378-
clusterResourcesBlocked.Set(float64(metrics.Resource.Blocked))
379-
clusterResourcesManaged.Set(float64(metrics.Resource.Managed))
380-
clusterResourcesFailed.Set(float64(metrics.Resource.Failed))
381-
clusterResourcesFailedIgnored.Set(float64(metrics.Resource.FailureIgnored))
356+
for {
382357

383-
// metrics with labels
384-
clusterNodes.WithLabelValues("member").Add(float64(metrics.Node.TypeMember))
385-
clusterNodes.WithLabelValues("ping").Add(float64(metrics.Node.TypePing))
386-
clusterNodes.WithLabelValues("remote").Add(float64(metrics.Node.TypeRemote))
387-
clusterNodes.WithLabelValues("unknown").Add(float64(metrics.Node.TypeUnknown))
388-
389-
clusterNodes.WithLabelValues("stopped").Add(float64(metrics.Resource.Stopped))
390-
clusterNodes.WithLabelValues("started").Add(float64(metrics.Resource.Started))
391-
clusterNodes.WithLabelValues("slave").Add(float64(metrics.Resource.Slave))
392-
clusterNodes.WithLabelValues("master").Add(float64(metrics.Resource.Master))
393-
394-
// TODO: this is historically, we might don't need to do like this. investigate on this later
395-
keys := make([]string, len(metrics.PerNode))
396-
i := 0
397-
for k := range metrics.PerNode {
398-
keys[i] = k
399-
i++
400-
}
401-
sort.Strings(keys)
402-
for _, k := range keys {
403-
node := metrics.PerNode[k]
404-
clusterResourcesRunning.WithLabelValues(k).Add(float64(node.ResourcesRunning))
405-
}
358+
var status crmMon
359+
// get cluster status xml
360+
fmt.Println("[INFO]: Reading cluster configuration with crm_mon..")
361+
monxml, err := exec.Command("/usr/sbin/crm_mon", "-1", "--as-xml", "--group-by-node", "--inactive").Output()
362+
if err != nil {
363+
fmt.Println("[ERROR]: crm_mon command was not executed correctly. Did you have crm_mon installed ?")
364+
panic(err)
365+
}
366+
367+
// read configuration
368+
err = xml.Unmarshal(monxml, &status)
369+
if err != nil {
370+
panic(err)
371+
}
372+
373+
metrics := parseGenericMetrics(&status)
374+
// add genric node metrics
375+
clusterNodesConf.Set(float64(metrics.Node.Configured))
376+
clusterNodesOnline.Set(float64(metrics.Node.Online))
377+
clusterNodesStandby.Set(float64(metrics.Node.Standby))
378+
clusterNodesStandbyOnFail.Set(float64(metrics.Node.StandbyOnFail))
379+
clusterNodesMaintenance.Set(float64(metrics.Node.Maintenance))
380+
clusterNodesPending.Set(float64(metrics.Node.Pending))
381+
clusterNodesUnclean.Set(float64(metrics.Node.Unclean))
382+
clusterNodesShutdown.Set(float64(metrics.Node.Shutdown))
383+
clusterNodesExpectedUp.Set(float64(metrics.Node.ExpectedUp))
384+
clusterNodesDC.Set(float64(metrics.Node.DC))
385+
// add genric resource metrics
386+
clusterResourcesUnique.Set(float64(metrics.Resource.Unique))
387+
clusterResourcesDisabled.Set(float64(metrics.Resource.Disabled))
388+
clusterResourcesConf.Set(float64(metrics.Resource.Configured))
389+
clusterResourcesActive.Set(float64(metrics.Resource.Active))
390+
clusterResourcesOrphaned.Set(float64(metrics.Resource.Orphaned))
391+
clusterResourcesBlocked.Set(float64(metrics.Resource.Blocked))
392+
clusterResourcesManaged.Set(float64(metrics.Resource.Managed))
393+
clusterResourcesFailed.Set(float64(metrics.Resource.Failed))
394+
clusterResourcesFailedIgnored.Set(float64(metrics.Resource.FailureIgnored))
395+
396+
// metrics with labels
397+
clusterNodes.WithLabelValues("member").Set(float64(metrics.Node.TypeMember))
398+
clusterNodes.WithLabelValues("ping").Set(float64(metrics.Node.TypePing))
399+
clusterNodes.WithLabelValues("remote").Set(float64(metrics.Node.TypeRemote))
400+
clusterNodes.WithLabelValues("unknown").Set(float64(metrics.Node.TypeUnknown))
401+
402+
// TODO: rename this metric with Total etc.
403+
// clusterResourcesTotal.WithLabelValues("stopped").Add(float64(metrics.Resource.Stopped))
404+
// clusterResources.WithLabelValues("started").Add(float64(metrics.Resource.Started))
405+
// clusterResources.WithLabelValues("slave").Add(float64(metrics.Resource.Slave))
406+
// clusterResources.WithLabelValues("master").Add(float64(metrics.Resource.Master))
407+
408+
// this will produce a metric like this:
409+
// cluster_resources{node="dma-dog-hana01" resource_name="RA1" role="master"} 1
410+
for _, nod := range status.Nodes.Node {
411+
for _, rsc := range nod.Resources {
412+
// TODO: FIXME FIND a mechanism to count the resources:
413+
clusterResources.WithLabelValues(nod.Name, rsc.ID, rsc.Role).Set(float64(1))
414+
}
415+
}
416+
417+
// TODO: this is historically, we might don't need to do like this. investigate on this later
418+
keys := make([]string, len(metrics.PerNode))
419+
i := 0
420+
for k := range metrics.PerNode {
421+
keys[i] = k
422+
i++
423+
}
424+
sort.Strings(keys)
425+
for _, k := range keys {
426+
node := metrics.PerNode[k]
427+
clusterResourcesRunning.WithLabelValues(k).Set(float64(node.ResourcesRunning))
428+
}
429+
// TODO: make this configurable later
430+
time.Sleep(2 * time.Second)
431+
432+
}
433+
}()
406434

407-
// serve metrics
408-
http.Handle("/metrics", promhttp.Handler())
409435
fmt.Println("[INFO]: Serving metrics on port", *portNumber)
410436
log.Fatal(http.ListenAndServe(*portNumber, nil))
411437
}

0 commit comments

Comments
 (0)