Skip to content

Commit 03d63f3

Browse files
authored
feat: Add prometheus metrics. (#540)
* ref: Move stats config to a separate file. * ref: Add an abstraction layer in front of statsd. * ref: Move statsd code under metrics, make private. * Add a prometheus interface at /metrics. * feat: Add "healthy" and "recording" metrics. * ref: Rename a function for clarity.
1 parent df5bfaa commit 03d63f3

File tree

12 files changed

+332
-142
lines changed

12 files changed

+332
-142
lines changed

pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,12 @@
270270
<version>1.2.1</version>
271271
<scope>test</scope>
272272
</dependency>
273+
<dependency>
274+
<groupId>org.jitsi</groupId>
275+
<artifactId>jicoco-metrics</artifactId>
276+
<version>1.1-133-g768ef2e</version>
277+
<scope>compile</scope>
278+
</dependency>
273279
</dependencies>
274280

275281
<build>

src/main/kotlin/org/jitsi/jibri/JibriManager.kt

Lines changed: 19 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package org.jitsi.jibri
2020
import org.jitsi.jibri.config.Config
2121
import org.jitsi.jibri.config.XmppCredentials
2222
import org.jitsi.jibri.health.EnvironmentContext
23+
import org.jitsi.jibri.metrics.JibriMetrics
2324
import org.jitsi.jibri.selenium.CallParams
2425
import org.jitsi.jibri.service.JibriService
2526
import org.jitsi.jibri.service.JibriServiceStatusHandler
@@ -30,14 +31,6 @@ import org.jitsi.jibri.service.impl.SipGatewayJibriService
3031
import org.jitsi.jibri.service.impl.SipGatewayServiceParams
3132
import org.jitsi.jibri.service.impl.StreamingJibriService
3233
import org.jitsi.jibri.service.impl.StreamingParams
33-
import org.jitsi.jibri.statsd.ASPECT_BUSY
34-
import org.jitsi.jibri.statsd.ASPECT_ERROR
35-
import org.jitsi.jibri.statsd.ASPECT_START
36-
import org.jitsi.jibri.statsd.ASPECT_STOP
37-
import org.jitsi.jibri.statsd.JibriStatsDClient
38-
import org.jitsi.jibri.statsd.TAG_SERVICE_LIVE_STREAM
39-
import org.jitsi.jibri.statsd.TAG_SERVICE_RECORDING
40-
import org.jitsi.jibri.statsd.TAG_SERVICE_SIP_GATEWAY
4134
import org.jitsi.jibri.status.ComponentBusyStatus
4235
import org.jitsi.jibri.status.ComponentHealthStatus
4336
import org.jitsi.jibri.status.ComponentState
@@ -99,40 +92,23 @@ class JibriManager : StatusPublisher<Any>() {
9992
private var pendingIdleFunc: () -> Unit = {}
10093
private var serviceTimeoutTask: ScheduledFuture<*>? = null
10194

102-
private val enableStatsD: Boolean by config {
103-
"JibriConfig::enableStatsD" { Config.legacyConfigSource.enabledStatsD!! }
104-
"jibri.stats.enable-stats-d".from(Config.configSource)
105-
}
106-
107-
private val statsdHost: String by config {
108-
"jibri.stats.host".from(Config.configSource)
109-
}
110-
111-
private val statsdPort: Int by config {
112-
"jibri.stats.port".from(Config.configSource)
113-
}
114-
11595
private val singleUseMode: Boolean by config {
11696
"JibriConfig::singleUseMode" { Config.legacyConfigSource.singleUseMode!! }
11797
"jibri.single-use-mode".from(Config.configSource)
11898
}
11999

120-
val statsDClient: JibriStatsDClient? = if (enableStatsD) {
121-
JibriStatsDClient(statsdHost, statsdPort)
122-
} else {
123-
null
124-
}
100+
val jibriMetrics = JibriMetrics()
125101

126102
/**
127103
* Note: should only be called if the instance-wide lock is held (i.e. called from
128104
* one of the synchronized methods)
129105
* TODO: instead of the synchronized decorators, use a synchronized(this) block
130106
* which we can also use here
131107
*/
132-
private fun throwIfBusy() {
108+
private fun throwIfBusy(sinkType: RecordingSinkType) {
133109
if (busy()) {
134110
logger.info("Jibri is busy, can't start service")
135-
statsDClient?.incrementCounter(ASPECT_BUSY, TAG_SERVICE_RECORDING)
111+
jibriMetrics.requestWhileBusy(sinkType)
136112
throw JibriBusyException()
137113
}
138114
}
@@ -148,7 +124,7 @@ class JibriManager : StatusPublisher<Any>() {
148124
environmentContext: EnvironmentContext? = null,
149125
serviceStatusHandler: JibriServiceStatusHandler? = null
150126
) {
151-
throwIfBusy()
127+
throwIfBusy(RecordingSinkType.FILE)
152128
logger.info("Starting a file recording with params: $fileRecordingRequestParams")
153129
val service = FileRecordingJibriService(
154130
FileRecordingParams(
@@ -158,7 +134,7 @@ class JibriManager : StatusPublisher<Any>() {
158134
serviceParams.appData?.fileRecordingMetadata
159135
)
160136
)
161-
statsDClient?.incrementCounter(ASPECT_START, TAG_SERVICE_RECORDING)
137+
jibriMetrics.start(RecordingSinkType.FILE)
162138
startService(service, serviceParams, environmentContext, serviceStatusHandler)
163139
}
164140

@@ -174,9 +150,9 @@ class JibriManager : StatusPublisher<Any>() {
174150
serviceStatusHandler: JibriServiceStatusHandler? = null
175151
) {
176152
logger.info("Starting a stream with params: $serviceParams $streamingParams")
177-
throwIfBusy()
153+
throwIfBusy(RecordingSinkType.STREAM)
178154
val service = StreamingJibriService(streamingParams)
179-
statsDClient?.incrementCounter(ASPECT_START, TAG_SERVICE_LIVE_STREAM)
155+
jibriMetrics.start(RecordingSinkType.STREAM)
180156
startService(service, serviceParams, environmentContext, serviceStatusHandler)
181157
}
182158

@@ -188,15 +164,15 @@ class JibriManager : StatusPublisher<Any>() {
188164
serviceStatusHandler: JibriServiceStatusHandler? = null
189165
) {
190166
logger.info("Starting a SIP gateway with params: $serviceParams $sipGatewayServiceParams")
191-
throwIfBusy()
167+
throwIfBusy(RecordingSinkType.GATEWAY)
192168
val service = SipGatewayJibriService(
193169
SipGatewayServiceParams(
194170
sipGatewayServiceParams.callParams,
195171
sipGatewayServiceParams.callLoginParams,
196172
sipGatewayServiceParams.sipClientParams
197173
)
198174
)
199-
statsDClient?.incrementCounter(ASPECT_START, TAG_SERVICE_SIP_GATEWAY)
175+
jibriMetrics.start(RecordingSinkType.GATEWAY)
200176
return startService(service, serviceParams, environmentContext, serviceStatusHandler)
201177
}
202178

@@ -219,7 +195,7 @@ class JibriManager : StatusPublisher<Any>() {
219195
when (it) {
220196
is ComponentState.Error -> {
221197
if (it.error.scope == ErrorScope.SYSTEM) {
222-
statsDClient?.incrementCounter(ASPECT_ERROR, JibriStatsDClient.getTagForService(jibriService))
198+
jibriMetrics.error(jibriService.getSinkType())
223199
publishStatus(ComponentHealthStatus.UNHEALTHY)
224200
}
225201
stopService()
@@ -270,7 +246,7 @@ class JibriManager : StatusPublisher<Any>() {
270246
logger.info("No service active, ignoring stop")
271247
return
272248
}
273-
statsDClient?.incrementCounter(ASPECT_STOP, JibriStatsDClient.getTagForService(currentService))
249+
jibriMetrics.stop(currentService.getSinkType())
274250
logger.info("Stopping the current service")
275251
serviceTimeoutTask?.cancel(false)
276252
// Note that this will block until the service is completely stopped
@@ -309,3 +285,10 @@ class JibriManager : StatusPublisher<Any>() {
309285
}
310286
}
311287
}
288+
289+
private fun JibriService.getSinkType() = when (this) {
290+
is FileRecordingJibriService -> RecordingSinkType.FILE
291+
is StreamingJibriService -> RecordingSinkType.GATEWAY
292+
is SipGatewayJibriService -> RecordingSinkType.GATEWAY
293+
else -> throw IllegalArgumentException("JibriService of unsupported type: ${JibriService::class.java.name}")
294+
}

src/main/kotlin/org/jitsi/jibri/Main.kt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ fun main(args: Array<String>) {
7878
jibriStatusManager.addStatusHandler {
7979
webhookClient.updateStatus(it)
8080
}
81+
jibriStatusManager.addStatusHandler {
82+
jibriManager.jibriMetrics.updateStatus(it)
83+
}
8184
webhookSubscribers.forEach(webhookClient::addSubscriber)
8285
val statusUpdaterTask = TaskPools.recurringTasksPool.scheduleAtFixedRate(
8386
1,
@@ -144,8 +147,7 @@ fun main(args: Array<String>) {
144147
val xmppApi = XmppApi(
145148
jibriManager = jibriManager,
146149
xmppConfigs = xmppEnvironments,
147-
jibriStatusManager = jibriStatusManager,
148-
jibriManager.statsDClient
150+
jibriStatusManager = jibriStatusManager
149151
)
150152
xmppApi.start()
151153

src/main/kotlin/org/jitsi/jibri/api/http/HttpApi.kt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package org.jitsi.jibri.api.http
1818

19+
import io.ktor.http.ContentType
1920
import io.ktor.http.HttpStatusCode
2021
import io.ktor.serialization.jackson.jackson
2122
import io.ktor.server.application.Application
@@ -24,10 +25,12 @@ import io.ktor.server.application.install
2425
import io.ktor.server.plugins.contentnegotiation.ContentNegotiation
2526
import io.ktor.server.request.receive
2627
import io.ktor.server.response.respond
28+
import io.ktor.server.response.respondText
2729
import io.ktor.server.routing.get
2830
import io.ktor.server.routing.post
2931
import io.ktor.server.routing.route
3032
import io.ktor.server.routing.routing
33+
import io.prometheus.client.exporter.common.TextFormat
3134
import jakarta.ws.rs.core.Response
3235
import org.jitsi.jibri.FileRecordingRequestParams
3336
import org.jitsi.jibri.JibriBusyException
@@ -36,6 +39,8 @@ import org.jitsi.jibri.RecordingSinkType
3639
import org.jitsi.jibri.config.Config
3740
import org.jitsi.jibri.config.XmppCredentials
3841
import org.jitsi.jibri.health.JibriHealth
42+
import org.jitsi.jibri.metrics.JibriMetricsContainer
43+
import org.jitsi.jibri.metrics.StatsConfig
3944
import org.jitsi.jibri.selenium.CallParams
4045
import org.jitsi.jibri.service.JibriServiceStatusHandler
4146
import org.jitsi.jibri.service.ServiceParams
@@ -130,6 +135,31 @@ class HttpApi(
130135
call.respond(HttpStatusCode.OK)
131136
}
132137
}
138+
if (StatsConfig.enablePrometheus) {
139+
logger.info("Enabling prometheus interface at :$port/metrics")
140+
get("/metrics") {
141+
val accept = call.request.headers["Accept"]
142+
when {
143+
accept?.startsWith("application/openmetrics-text") == true ->
144+
call.respondText(
145+
JibriMetricsContainer.getPrometheusMetrics(TextFormat.CONTENT_TYPE_OPENMETRICS_100),
146+
contentType = ContentType.parse(TextFormat.CONTENT_TYPE_OPENMETRICS_100)
147+
)
148+
149+
accept?.startsWith("text/plain") == true ->
150+
call.respondText(
151+
JibriMetricsContainer.getPrometheusMetrics(TextFormat.CONTENT_TYPE_004),
152+
contentType = ContentType.parse(TextFormat.CONTENT_TYPE_004)
153+
)
154+
155+
else ->
156+
call.respondText(
157+
JibriMetricsContainer.jsonString,
158+
contentType = ContentType.parse("application/json")
159+
)
160+
}
161+
}
162+
}
133163
}
134164
}
135165

src/main/kotlin/org/jitsi/jibri/api/xmpp/XmppApi.kt

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,6 @@ import org.jitsi.jibri.service.impl.SipGatewayServiceParams
3232
import org.jitsi.jibri.service.impl.StreamingParams
3333
import org.jitsi.jibri.service.impl.YOUTUBE_URL
3434
import org.jitsi.jibri.sipgateway.SipClientParams
35-
import org.jitsi.jibri.statsd.JibriStatsDClient
36-
import org.jitsi.jibri.statsd.STOPPED_ON_XMPP_CLOSED
37-
import org.jitsi.jibri.statsd.XMPP_CLOSED
38-
import org.jitsi.jibri.statsd.XMPP_CLOSED_ON_ERROR
39-
import org.jitsi.jibri.statsd.XMPP_CONNECTED
40-
import org.jitsi.jibri.statsd.XMPP_PING_FAILED
41-
import org.jitsi.jibri.statsd.XMPP_RECONNECTING
42-
import org.jitsi.jibri.statsd.XMPP_RECONNECTION_FAILED
4335
import org.jitsi.jibri.status.ComponentState
4436
import org.jitsi.jibri.status.JibriStatus
4537
import org.jitsi.jibri.status.JibriStatusManager
@@ -78,22 +70,21 @@ class XmppApi(
7870
private val jibriManager: JibriManager,
7971
private val xmppConfigs: List<XmppEnvironmentConfig>,
8072
private val jibriStatusManager: JibriStatusManager,
81-
private val statsDClient: JibriStatsDClient? = null
8273
) : IQListener {
8374
private val logger = createLogger()
8475

8576
private val connectionStateListener = object : ConnectionStateListener {
8677
override fun connected(mucClient: MucClient) {
87-
statsDClient?.incrementCounter(XMPP_CONNECTED, mucClient.tags())
78+
jibriManager.jibriMetrics.xmppConnected(mucClient.tags())
8879
}
8980
override fun reconnecting(mucClient: MucClient) {
90-
statsDClient?.incrementCounter(XMPP_RECONNECTING, mucClient.tags())
81+
jibriManager.jibriMetrics.xmppReconnecting(mucClient.tags())
9182
}
9283
override fun reconnectionFailed(mucClient: MucClient) {
93-
statsDClient?.incrementCounter(XMPP_RECONNECTION_FAILED, mucClient.tags())
84+
jibriManager.jibriMetrics.xmppReconnectionFailed(mucClient.tags())
9485
}
9586
override fun pingFailed(mucClient: MucClient) {
96-
statsDClient?.incrementCounter(XMPP_PING_FAILED, mucClient.tags())
87+
jibriManager.jibriMetrics.xmppPingFailed(mucClient.tags())
9788
}
9889

9990
/**
@@ -102,7 +93,7 @@ class XmppApi(
10293
* recording is stopped.
10394
*/
10495
override fun closed(mucClient: MucClient) {
105-
statsDClient?.incrementCounter(XMPP_CLOSED, mucClient.tags())
96+
jibriManager.jibriMetrics.xmppClosed(mucClient.tags())
10697
maybeStop(mucClient)
10798
}
10899

@@ -112,7 +103,7 @@ class XmppApi(
112103
* recording is stopped.
113104
*/
114105
override fun closedOnError(mucClient: MucClient) {
115-
statsDClient?.incrementCounter(XMPP_CLOSED_ON_ERROR, mucClient.tags())
106+
jibriManager.jibriMetrics.xmppClosedOnError(mucClient.tags())
116107
maybeStop(mucClient)
117108
}
118109

@@ -121,7 +112,7 @@ class XmppApi(
121112
val environmentContext = createEnvironmentContext(xmppEnvironment, mucClient)
122113
if (jibriManager.currentEnvironmentContext == environmentContext) {
123114
logger.warn("XMPP disconnected, stopping.")
124-
statsDClient?.incrementCounter(STOPPED_ON_XMPP_CLOSED, mucClient.tags())
115+
jibriManager.jibriMetrics.stoppedOnXmppClosed(mucClient.tags())
125116
jibriManager.stopService()
126117
}
127118
}

0 commit comments

Comments
 (0)