Skip to content

Commit bf3178d

Browse files
committed
prometheus-node-exporter-ucode: new modules & info
-thermal management (temp and cooling devices) -uname amendment to oneline and help string -realtek-poe -odhcp6c -entropy help strings -time help string -hwmon -time clocksources -watchdog -metrics help strings Signed-off-by: Paul Donald <[email protected]>
1 parent 7b3219a commit bf3178d

File tree

10 files changed

+347
-9
lines changed

10 files changed

+347
-9
lines changed

utils/prometheus-node-exporter-ucode/Makefile

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
include $(TOPDIR)/rules.mk
44

55
PKG_NAME:=prometheus-node-exporter-ucode
6-
PKG_VERSION:=2024.02.07
7-
PKG_RELEASE:=2
6+
PKG_VERSION:=2025.12.04
7+
PKG_RELEASE:=1
88

99
PKG_MAINTAINER:=Andre Heider <[email protected]>
1010
PKG_LICENSE:=Apache-2.0
@@ -65,10 +65,14 @@ define Collector
6565
endef
6666

6767
$(eval $(call Collector,dnsmasq,Dnsmasq collector,@dnsmasq))
68+
$(eval $(call Collector,hwmon,hwmon collector,))
6869
$(eval $(call Collector,ltq-dsl,Lantiq/Intel/MaxLinear DSL collector,@ltq-dsl-app))
6970
$(eval $(call Collector,netstat,netstat collector,))
71+
$(eval $(call Collector,odhcp6c,odhcp6c statistics collector,@odhcp6c))
7072
$(eval $(call Collector,openwrt,OpenWrt collector,))
73+
$(eval $(call Collector,realtek-poe,RealTek PoE collector,@realtek-poe))
7174
$(eval $(call Collector,snmp6,snmp6 collector,))
75+
$(eval $(call Collector,thermal,thermal collector,))
7276
$(eval $(call Collector,uci_dhcp_host,UCI DHCP host collector,))
7377
$(eval $(call Collector,wifi,Wi-Fi collector,+ucode-mod-nl80211))
7478
$(eval $(call Collector,wireguard,Wireguard collector,+rpcd-mod-wireguard))
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
gauge("node_entropy_available_bits")
1+
gauge("node_entropy_available_bits", "Bits of available entropy.")
22
(null, oneline("/proc/sys/kernel/random/entropy_avail"));
3-
gauge("node_entropy_pool_size_bits")
3+
gauge("node_entropy_pool_size_bits", "Bits of entropy pool.")
44
(null, oneline("/proc/sys/kernel/random/poolsize"));
Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,34 @@
1-
gauge("node_time_seconds")(null, time());
1+
gauge("node_time_seconds", "System time in seconds since epoch (1970).")(null, time());
2+
3+
// based loosely on https://github.com/prometheus/node_exporter/blob/master/collector/time.go
4+
5+
const SDS_CLOCK_PATH = "/sys/devices/system/clocksource/";
6+
7+
const avail_gauge = gauge("node_time_clocksource_available_info",
8+
"Available clocksources read from '/sys/devices/system/clocksource'.");
9+
const current_gauge = gauge("node_time_clocksource_current_info",
10+
"Current clocksource read from '/sys/devices/system/clocksource'.");
11+
12+
const current_sources = [];
13+
14+
for (let clock_src_path in fs.lsdir(SDS_CLOCK_PATH, "clocksource*")) {
15+
16+
const csp_match = match(clock_src_path, /clocksource(\d+)/);
17+
if (!csp_match)
18+
continue;
19+
20+
const sources = split(oneline(`${SDS_CLOCK_PATH}/${clock_src_path}/available_clocksource`), ' ');
21+
const current = oneline(`${SDS_CLOCK_PATH}/${clock_src_path}/current_clocksource`);
22+
const device = csp_match?.[1];
23+
24+
for (let source in sources) {
25+
if (!source) continue;
26+
avail_gauge({ clocksource: source, device: `${device}` }, 1)
27+
}
28+
29+
push(current_sources, { clocksource: current, device: `${device}` });
30+
}
31+
32+
for (let cs in current_sources) {
33+
current_gauge(cs, 1);
34+
}
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
gauge("node_uname_info")({
1+
gauge("node_uname_info", "Labelled system information as provided by the uname system call.")({
22
sysname: oneline("/proc/sys/kernel/ostype"),
33
nodename: oneline("/proc/sys/kernel/hostname"),
44
release: oneline("/proc/sys/kernel/osrelease"),
55
version: oneline("/proc/sys/kernel/version"),
6-
machine: poneline("uname -m"), // TODO lame
6+
machine: oneline("/proc/sys/kernel/arch"),
77
domainname: oneline("/proc/sys/kernel/domainname"),
88
}, 1);
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// based loosely on https://github.com/prometheus/node_exporter/blob/master/collector/watchdog.go
2+
3+
const SC_WATCHDOG_PATH = "/sys/class/watchdog/";
4+
5+
const metrics = {
6+
bootstatus: gauge("node_watchdog_bootstatus",
7+
"Value of /sys/class/watchdog/<watchdog>/bootstatus"),
8+
fw_version: gauge("node_watchdog_fw_version",
9+
"Value of /sys/class/watchdog/<watchdog>/fw_version"),
10+
nowayout: gauge("node_watchdog_nowayout",
11+
"Value of /sys/class/watchdog/<watchdog>/nowayout"),
12+
timeleft: gauge("node_watchdog_timeleft_seconds",
13+
"Value of /sys/class/watchdog/<watchdog>/timeleft"),
14+
timeout: gauge("node_watchdog_timeout_seconds",
15+
"Value of /sys/class/watchdog/<watchdog>/timeout"),
16+
pretimeout: gauge("node_watchdog_pretimeout_seconds",
17+
"Value of /sys/class/watchdog/<watchdog>/pretimeout"),
18+
access_cs0: gauge("node_watchdog_access_cs0",
19+
"Value of /sys/class/watchdog/<watchdog>/access_cs0"),
20+
};
21+
22+
const info_gauge = gauge("node_watchdog_info",
23+
"Info of /sys/class/watchdog/<watchdog>");
24+
const avail_gauge = gauge("node_watchdog_available",
25+
"Info of /sys/class/watchdog/<watchdog>/pretimeout_available_governors");
26+
27+
28+
const wd_paths = [];
29+
30+
for (let wd_path in fs.lsdir(SC_WATCHDOG_PATH, "watchdog*")) {
31+
push(wd_paths, wd_path);
32+
}
33+
34+
// watchdog metrics
35+
for (let m in metrics) {
36+
for (let wd_path in wd_paths)
37+
metrics[m]({ name: `${wd_path}` }, oneline(`${SC_WATCHDOG_PATH}/${wd_path}/${m}`));
38+
}
39+
40+
// watchdog summary info properties
41+
for (let wd_path in wd_paths) {
42+
const path = `${SC_WATCHDOG_PATH}/${wd_path}`;
43+
info_gauge({
44+
name: `${wd_path}`,
45+
options: `${oneline(`${path}/options`)}`,
46+
identity: `${oneline(`${path}/identity`)}`,
47+
state: `${oneline(`${path}/state`)}`,
48+
status: `${oneline(`${path}/status`)}`,
49+
pretimeout_governor: `${oneline(`${path}/pretimeout_governor`)}`,
50+
}, 1);
51+
}
52+
53+
for (let wd_path in wd_paths) {
54+
const pa = split(oneline(`${SC_WATCHDOG_PATH}/${wd_path}/pretimeout_available_governors`), ' ');
55+
for (let gov in pa) {
56+
if (!gov) continue;
57+
avail_gauge({ available: gov, device: `${wd_path}` }, 1)
58+
}
59+
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
2+
const metric_chip_names = gauge("node_hwmon_chip_names", "Annotation metric for human-readable chip names");
3+
const metric_sensor_label = gauge("node_hwmon_sensor_label", "Label for given chip and sensor");
4+
const metric_temp_celsius = gauge("node_hwmon_temp_celsius", "Hardware monitor for temperature");
5+
const metric_pwm = gauge("node_hwmon_pwm", "Pulse Width Modulation control");
6+
7+
const hwmon_paths = [];
8+
const chip_names = [];
9+
10+
const SC_HWMON_PATH = "/sys/class/hwmon/";
11+
12+
for (let hwmon_path in fs.lsdir(SC_HWMON_PATH, "hwmon*")) {
13+
const full_path = `${SC_HWMON_PATH}${hwmon_path}`;
14+
push(hwmon_paths, full_path);
15+
16+
// Produce node_hwmon_chip_names
17+
// See https://github.com/prometheus/node_exporter/blob/7c564bcbeffade3dacac43b07c2eeca4957ca71d/collector/hwmon_linux.go#L415
18+
const chip_name = oneline(`${full_path}/name`) || hwmon_path;
19+
20+
// See https://github.com/prometheus/node_exporter/blob/7c564bcbeffade3dacac43b07c2eeca4957ca71d/collector/hwmon_linux.go#L355
21+
let chip = chip_name;
22+
const real_dev_path = fs.realpath(`${full_path}/device`);
23+
24+
if (real_dev_path) {
25+
const dev_name = fs.basename(real_dev_path);
26+
const dev_type = fs.basename(fs.dirname(real_dev_path));
27+
28+
chip = `${dev_type}_${dev_name}`;
29+
30+
}
31+
push(chip_names, chip);
32+
33+
metric_chip_names({ chip: chip, chip_name: chip_name }, 1);
34+
}
35+
36+
37+
map(hwmon_paths, function(path, index) {
38+
for (let sensor_path in fs.lsdir(path, "*_label")) {
39+
// Produce node_hwmon_sensor_label
40+
if (match(sensor_path, /_label$/)) {
41+
42+
const sensor = rtrim(sensor_path, "_label");
43+
const sensor_label = oneline(`${path}/${sensor_path}`);
44+
45+
metric_sensor_label({ chip: chip_names[index], sensor: sensor, label: sensor_label }, 1);
46+
47+
}
48+
}
49+
});
50+
51+
// for (let path in hwmon_paths) {
52+
map(hwmon_paths, function(path, index) {
53+
for (let sensor_path in fs.lsdir(path, "temp*_input")) {
54+
55+
// Produce node_hwmon_temp_celsius
56+
if (match(sensor_path, /^temp\d+_input$/)) {
57+
58+
const sensor = rtrim(sensor_path, "_input");
59+
const temp = oneline(`${path}/${sensor_path}`) / 1000.00;
60+
61+
metric_temp_celsius({ chip: chip_names[index], sensor: sensor }, temp);
62+
63+
}
64+
}
65+
});
66+
67+
map(hwmon_paths, function(path, index) {
68+
for (let sensor_path in fs.lsdir(path, "pwm*")) {
69+
70+
// Produce node_hwmon_pwm
71+
if (match(sensor_path, /^pwm[0-9]+$/)) {
72+
73+
const pwm = oneline(`${path}/${sensor_path}`);
74+
75+
metric_pwm({ chip: chip_names[index], sensor: sensor_path }, pwm);
76+
77+
}
78+
}
79+
});
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
const devs = ubus.call("network.device", "status");
2+
3+
if (!devs)
4+
return false;
5+
6+
for (let dev in devs) {
7+
const m = ubus.call(`odhcp6c.${dev}`, "get_statistics");
8+
9+
// not all interfaces are exposed unless odhcp6c runs on it
10+
if (!m)
11+
continue;
12+
13+
for (let i in m)
14+
gauge(`node_odhcp6c_${i}`, `Total DHCPv6 messages of type ${i}`)({ dev: dev }, m[i]);
15+
16+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
const poe_info = ubus.call("poe", "info");
2+
3+
if (!poe_info)
4+
return false;
5+
6+
// possible poe modes for a port
7+
// realtek-poe/src/main.c
8+
// static int poe_reply_port_ext_config()
9+
const POE_MODES = [
10+
"PoE",
11+
"Legacy",
12+
"pre-PoE+",
13+
"PoE+",
14+
];
15+
16+
// possible poe states for a port
17+
// realtek-poe/src/main.c
18+
// static int poe_reply_4_port_status()
19+
const POE_STATES = [
20+
"Disabled",
21+
"Searching",
22+
"Delivering power",
23+
"Fault",
24+
"Other fault",
25+
"Requesting power",
26+
];
27+
28+
29+
// start main scraping function:
30+
31+
32+
// helper vars
33+
const mcu = poe_info["mcu"];
34+
const ports = poe_info["ports"];
35+
const budget = poe_info["budget"];
36+
const firmware = poe_info["firmware"];
37+
const consumption = poe_info["consumption"];
38+
39+
// push info, budget and consumption metric
40+
gauge(`realtek_poe_switch_info`)({ mcu: mcu, firmware: firmware }, 1);
41+
gauge(`realtek_poe_switch_budget_watts`)(null, budget);
42+
gauge(`realtek_poe_switch_consumption_watts`)(null, consumption);
43+
44+
// push per port priority metrics
45+
const priority_metric = gauge(`realtek_poe_port_priority`);
46+
for (port, values in ports) {
47+
priority_metric({ device: port }, values["priority"]);
48+
}
49+
50+
// push per port consumption metrics
51+
const consumption_metric = gauge(`realtek_poe_port_consumption_watts`);
52+
for (port, values in ports) {
53+
consumption_metric({ device: port }, (values["consumption"] || 0));
54+
}
55+
56+
// push per port state metrics
57+
const state_metric = gauge(`realtek_poe_port_state`);
58+
for (let state in POE_STATES) {
59+
for (port, values in ports) {
60+
state_metric({ device: port, state: state }, (values["status"] == state) ? 1 : 0);
61+
}
62+
}
63+
64+
// push per port mode metrics
65+
const mode_metric = gauge(`realtek_poe_port_mode`);
66+
for (let mode in POE_MODES) {
67+
for (port, values in ports) {
68+
mode_metric({ device: port, mode: mode }, (values["mode"] == mode) ? 1 : 0);
69+
}
70+
}
71+
72+
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
2+
// modelled after: https://github.com/prometheus/node_exporter/blob/master/collector/thermal_zone_linux.go
3+
// See also: https://docs.kernel.org/driver-api/thermal/sysfs-api.html
4+
5+
// thermal collector
6+
const thermal_devs = [];
7+
8+
for (let idx = 0; ; idx++) {
9+
const devPath = `/sys/class/thermal/thermal_zone${idx}`;
10+
const typ = oneline(devPath + "/type");
11+
if (!typ) break;
12+
13+
const policy = oneline(devPath + "/policy");
14+
if (!policy) break;
15+
16+
const temp = oneline(devPath + "/temp");
17+
if (!temp) break;
18+
19+
push(thermal_devs, {
20+
idx,
21+
typ,
22+
policy,
23+
temp,
24+
mode: oneline(devPath + "/mode"),
25+
passive: oneline(devPath + "/passive"),
26+
});
27+
}
28+
29+
if (length(thermal_devs) > 0) {
30+
const temp_metric = gauge("node_thermal_zone_temp", "Zone temperature in Celsius");
31+
32+
for (let d in thermal_devs) {
33+
const labels = { zone: `${d.idx}`, type: d.typ, policy: d.policy };
34+
if (d.passive) labels.passive = d.passive;
35+
36+
temp_metric(labels, d.temp / 1000.00);
37+
}
38+
}
39+
40+
// cooling collector
41+
const cooling_devs = [];
42+
43+
for (let idx = 0; ; idx++) {
44+
const devPath = `/sys/class/thermal/cooling_device${idx}`;
45+
const typ = oneline(devPath + "/type");
46+
if (!typ) break;
47+
48+
push(cooling_devs, {
49+
idx,
50+
typ,
51+
cur: oneline(devPath + "/cur_state"),
52+
max: oneline(devPath + "/max_state"),
53+
});
54+
}
55+
56+
if (length(cooling_devs) > 0) {
57+
58+
const cur_throttle = gauge("node_cooling_device_cur_state", "Current throttle state of the cooling device");
59+
60+
for (let d in cooling_devs) {
61+
const labels = { name: `${d.idx}`, type: d.typ };
62+
63+
cur_throttle(labels, d.cur);
64+
}
65+
66+
const max_throttle = gauge("node_cooling_device_max_state", "Maximum throttle state of the cooling device");
67+
68+
for (let d in cooling_devs) {
69+
const labels = { name: `${d.idx}`, type: d.typ };
70+
71+
max_throttle(labels, d.max);
72+
}
73+
}

utils/prometheus-node-exporter-ucode/files/metrics.uc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,10 @@ global.handle_request = function(env) {
156156
157157
httpstatus("200 OK");
158158
159-
let duration = gauge("node_scrape_collector_duration_seconds");
160-
let success = gauge("node_scrape_collector_success");
159+
let duration = gauge("node_scrape_collector_duration_seconds",
160+
"node_exporter: Duration of a collector scrape.");
161+
let success = gauge("node_scrape_collector_success",
162+
"node_exporter: Whether a collector succeeded.");
161163
162164
for (let col in cols) {
163165
let ok = false;

0 commit comments

Comments
 (0)