Skip to content

Commit 2ec37a2

Browse files
committed
Fix: scheduling: recognize and try to handle cgroup-v2 similarly
1 parent 2a6fd74 commit 2ec37a2

File tree

2 files changed

+71
-9
lines changed

2 files changed

+71
-9
lines changed

src/sbd-common.c

+57-9
Original file line numberDiff line numberDiff line change
@@ -729,9 +729,15 @@ static int get_realtime_budget(void)
729729
}
730730

731731
/* stolen from corosync */
732+
733+
#define LEGACY_CGROUP_PROC_PIDS "/sys/fs/cgroup/cpu/tasks"
734+
#define UNIFIED_CGROUP_PROC_PIDS "/sys/fs/cgroup/cgroup.procs"
735+
732736
static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
733737
FILE *f;
734-
int res = -1;
738+
int res = -1, num;
739+
char *rt_rq_name = NULL;
740+
const char *root_pids = LEGACY_CGROUP_PROC_PIDS;
735741

736742
/*
737743
* /sys/fs/cgroup is hardcoded, because most of Linux distributions are now
@@ -740,13 +746,53 @@ static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
740746
* This feature is expected to be removed as soon as systemd gets support
741747
* for managing RT configuration.
742748
*/
743-
f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
744-
if (f == NULL) {
745-
cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist -> "
749+
do {
750+
f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
751+
if (f) {
752+
break;
753+
}
754+
/* CONFIG_RT_GROUP_SCHED might still be enabled with cgroup-v2
755+
cgroup.procs on cgroup-toplevel tells us we have cgroup-v2
756+
(handy as we already need that to be in selinux-policy)
757+
and name of rt_rq(s) in /proc/sched_debug tells us that
758+
CONFIG_RT_GROUP_SCHED is enabled
759+
cgroup-v2 has been around for a while in the kernel and it
760+
is no mutual exclusive compile-time-configuration - so
761+
checking what is actually mounted to go with what is there
762+
*/
763+
f = fopen(UNIFIED_CGROUP_PROC_PIDS, "rt");
764+
if (f) {
765+
fclose(f);
766+
f = fopen("/proc/sched_debug", "rt");
767+
if (f) {
768+
while (((num = fscanf(f, "rt_rq[%*[^]]]:%m[^\n]\n",
769+
&rt_rq_name)) != EOF) &&
770+
(rt_rq_name == NULL)) {
771+
/* consume a line */
772+
if ((num > 0) || (fscanf(f, "%*[^\n]") == EOF) ||
773+
(fscanf(f, "\n") == EOF)) {
774+
break;
775+
}
776+
}
777+
/* no hierarchical rt-budget distribution with
778+
cgroup-v2 so far - thus checking for budget is
779+
useless
780+
*/
781+
if (rt_rq_name) {
782+
free(rt_rq_name);
783+
enforce_root_cgroup = true;
784+
root_pids = UNIFIED_CGROUP_PROC_PIDS;
785+
break;
786+
}
787+
fclose(f);
788+
}
789+
}
790+
cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist & "
791+
"/proc/sched_debug doesn't contain rt_rq[...]:/ -> "
746792
"system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
747793
res = 0;
748794
goto exit_res;
749-
}
795+
} while (0);
750796
fclose(f);
751797

752798
if ((!enforce_root_cgroup) && (get_realtime_budget() > 0)) {
@@ -756,21 +802,23 @@ static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
756802
goto exit_res;
757803
}
758804

759-
f = fopen("/sys/fs/cgroup/cpu/tasks", "w");
805+
f = fopen(root_pids, "w");
760806
if (f == NULL) {
761-
cl_log(LOG_WARNING, "Can't open cgroups tasks file for writing");
807+
cl_log(LOG_WARNING, "Can't open %s for writing", root_pids);
762808

763809
goto exit_res;
764810
}
765811

766812
if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) {
767-
cl_log(LOG_WARNING, "Can't write sbd pid into cgroups tasks file");
813+
cl_log(LOG_WARNING, "Can't write sbd pid into %s", root_pids);
768814
goto close_and_exit_res;
769815
}
770816

817+
res = 0;
818+
771819
close_and_exit_res:
772820
if (fclose(f) != 0) {
773-
cl_log(LOG_WARNING, "Can't close cgroups tasks file");
821+
cl_log(LOG_WARNING, "Can't close %s", root_pids);
774822
goto exit_res;
775823
}
776824

src/sbd.sysconfig

+14
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,20 @@ SBD_TIMEOUT_ACTION=flush,reboot
104104
# If that is the case sbd will stay in that slice while it will
105105
# be moved to root-slice otherwise.
106106
#
107+
# With cgroup-v2 behavior is very much different.
108+
# With CONFIG_RT_GROUP_SCHED enabled and cpu-controller enabled
109+
# there currently is no way to configure RT-budget in any slice
110+
# but the root-slice. Otherway round if there is RT-budget used
111+
# in any but the root-slice enabling the cpu-controller is
112+
# inhibited.
113+
# Thus - unless strictly disabled by setting 'no' - with cgroup-v2
114+
# and CONFIG_RT_GROUP_SCHED enabled sbd is always moved
115+
# to the root-slice regardless if the cpu-controller is at the
116+
# moment enabled or not.
117+
# Reason is that subsequent services might enable the cpu-controller
118+
# or fail doing so if sbd was already using RT-budget in e.g. the
119+
# system-slice.
120+
#
107121
SBD_MOVE_TO_ROOT_CGROUP=auto
108122

109123
## Type: string

0 commit comments

Comments
 (0)