Skip to content

[YUNIKORN-2854]Add queue maxRunningApps metrics #1012

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions pkg/metrics/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ const (
ContainerAllocated = "allocated"
ContainerRejected = "rejected"

QueueGuaranteed = "guaranteed"
QueueMax = "max"
QueuePending = "pending"
QueuePreempting = "preempting"
QueueGuaranteed = "guaranteed"
QueueMax = "max"
QueuePending = "pending"
QueuePreempting = "preempting"
QueueMaxRunningApps = "maxRunningApps"
)

// QueueMetrics to declare queue metrics
Expand Down Expand Up @@ -99,15 +100,15 @@ func InitQueueMetrics(name string) *QueueMetrics {
Namespace: Namespace,
Name: "queue_resource",
ConstLabels: prometheus.Labels{"queue": name},
Help: "Queue resource metrics. State of the resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`.",
Help: "Queue resource metrics. State of the resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`, `maxRunningApps`.",
}, []string{"state", "resource"})

q.resourceMetricsSubsystem = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: Namespace,
Subsystem: replaceStr,
Name: "queue_resource",
Help: "Queue resource metrics. State of the resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`.",
Help: "Queue resource metrics. State of the resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`, `maxRunningApps`.",
}, []string{"state", "resource"})

var queueMetricsList = []prometheus.Collector{
Expand Down Expand Up @@ -354,3 +355,7 @@ func (m *QueueMetrics) SetQueuePendingResourceMetrics(resourceName string, value
func (m *QueueMetrics) SetQueuePreemptingResourceMetrics(resourceName string, value float64) {
m.setQueueResource(QueuePreempting, resourceName, value)
}

func (m *QueueMetrics) SetQueueMaxRunningAppsMetrics(value uint64) {
m.setQueueResource(QueueMaxRunningApps, "apps", float64(value))
}
8 changes: 8 additions & 0 deletions pkg/metrics/queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,14 @@ func TestQueuePreemptingResourceMetrics(t *testing.T) {
verifyResourceMetrics(t, "preempting", "cpu")
}

func TestQueueMaxRunningAppsResourceMetrics(t *testing.T) {
qm = getQueueMetrics()
defer unregisterQueueMetrics()

qm.SetQueueMaxRunningAppsMetrics(1)
verifyResourceMetrics(t, "maxRunningApps", "apps")
}

func TestRemoveQueueMetrics(t *testing.T) {
testQueueName := "root.test"
qm = GetQueueMetrics(testQueueName)
Expand Down
8 changes: 8 additions & 0 deletions pkg/scheduler/objects/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ func NewConfiguredQueue(conf configs.QueueConfig, parent *Queue) (*Queue, error)
sq.parent = parent
sq.isManaged = true
sq.maxRunningApps = conf.MaxApplications
sq.updateMaxRunningAppsMetrics()

// update the properties
if err := sq.applyConf(conf); err != nil {
Expand Down Expand Up @@ -223,6 +224,7 @@ func (sq *Queue) applyTemplate(childTemplate *template.Template) {
// update metrics for guaranteed and max resource
sq.updateGuaranteedResourceMetrics()
sq.updateMaxResourceMetrics()
sq.updateMaxRunningAppsMetrics()
}

// getProperties returns a copy of the properties for this queue
Expand Down Expand Up @@ -366,6 +368,7 @@ func (sq *Queue) applyConf(conf configs.QueueConfig) error {
return err
}
sq.maxRunningApps = conf.MaxApplications
sq.updateMaxRunningAppsMetrics()
}

sq.properties = conf.Properties
Expand Down Expand Up @@ -462,6 +465,7 @@ func (sq *Queue) SetMaxRunningApps(maxApps uint64) {
sq.Lock()
defer sq.Unlock()
sq.maxRunningApps = maxApps
sq.updateMaxRunningAppsMetrics()
}

// setTemplate sets the template on the queue based on the config.
Expand Down Expand Up @@ -1705,6 +1709,10 @@ func (sq *Queue) updatePreemptingResourceMetrics() {
}
}

func (sq *Queue) updateMaxRunningAppsMetrics() {
metrics.GetQueueMetrics(sq.QueuePath).SetQueueMaxRunningAppsMetrics(sq.maxRunningApps)
}

func (sq *Queue) removeMetrics() {
metrics.RemoveQueueMetrics(sq.QueuePath)
}
Expand Down
8 changes: 7 additions & 1 deletion pkg/scheduler/objects/queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,11 @@ func TestPendingCalc(t *testing.T) {
want := concatQueueResourceMetric(metrics, []string{`
yunikorn_root_queue_resource{resource="memory",state="pending"} 100
yunikorn_root_queue_resource{resource="vcores",state="pending"} 10
yunikorn_root_queue_resource{resource="apps",state="maxRunningApps"} 0
`, `
yunikorn_root_leaf_queue_resource{resource="memory",state="pending"} 100
yunikorn_root_leaf_queue_resource{resource="vcores",state="pending"} 10
yunikorn_root_leaf_queue_resource{resource="apps",state="maxRunningApps"} 0
`},
)
assert.NilError(t, promtu.GatherAndCompare(prometheus.DefaultGatherer, strings.NewReader(want), metrics...), "unexpected metrics")
Expand All @@ -314,9 +316,11 @@ yunikorn_root_leaf_queue_resource{resource="vcores",state="pending"} 10
want = concatQueueResourceMetric(metrics, []string{`
yunikorn_root_queue_resource{resource="memory",state="pending"} 0
yunikorn_root_queue_resource{resource="vcores",state="pending"} 0
yunikorn_root_queue_resource{resource="apps",state="maxRunningApps"} 0
`, `
yunikorn_root_leaf_queue_resource{resource="memory",state="pending"} 0
yunikorn_root_leaf_queue_resource{resource="vcores",state="pending"} 0
yunikorn_root_leaf_queue_resource{resource="apps",state="maxRunningApps"} 0
`},
)
assert.NilError(t, promtu.GatherAndCompare(prometheus.DefaultGatherer, strings.NewReader(want), metrics...), "unexpected metrics")
Expand All @@ -334,16 +338,18 @@ yunikorn_root_leaf_queue_resource{resource="vcores",state="pending"} 0
want = concatQueueResourceMetric(metrics, []string{`
yunikorn_root_queue_resource{resource="memory",state="pending"} 0
yunikorn_root_queue_resource{resource="vcores",state="pending"} 0
yunikorn_root_queue_resource{resource="apps",state="maxRunningApps"} 0
`, `
yunikorn_root_leaf_queue_resource{resource="memory",state="pending"} 0
yunikorn_root_leaf_queue_resource{resource="vcores",state="pending"} 0
yunikorn_root_leaf_queue_resource{resource="apps",state="maxRunningApps"} 0
`},
)
assert.NilError(t, promtu.GatherAndCompare(prometheus.DefaultGatherer, strings.NewReader(want), metrics...), "unexpected metrics")
}

const (
QueueResourceMetricHelp = "# HELP %v Queue resource metrics. State of the resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`."
QueueResourceMetricHelp = "# HELP %v Queue resource metrics. State of the resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`, `maxRunningApps`."
QueueResourceMetricType = "# TYPE %v gauge"
)

Expand Down
Loading