2626#include <pwd.h>
2727#include <unistd.h>
2828#include <dirent.h>
29+ #include <sys/time.h>
30+ #include <sys/resource.h>
31+ #include <limits.h>
2932
3033#ifdef _POSIX_MEMLOCK
3134# include <sys/mman.h>
@@ -298,7 +301,7 @@ watchdog_populate_list(void)
298301 FILE * file ;
299302
300303 snprintf (entry_name , sizeof (entry_name ),
301- SYS_CLASS_WATCHDOG "/%s/dev" , entry -> d_name );
304+ SYS_CLASS_WATCHDOG "/%s/dev" , entry -> d_name );
302305 file = fopen (entry_name , "r" );
303306 if (file ) {
304307 int major , minor ;
@@ -667,7 +670,7 @@ static int get_realtime_budget(void)
667670{
668671 FILE * f ;
669672 char fname [PATH_MAX ];
670- int res = -1 , lnum = 0 ;
673+ int res = -1 , lnum = 0 , num ;
671674 char * cgroup = NULL , * namespecs = NULL ;
672675
673676 snprintf (fname , PATH_MAX , "/proc/%jd/cgroup" , (intmax_t )getpid ());
@@ -677,7 +680,8 @@ static int get_realtime_budget(void)
677680 (intmax_t )getpid ());
678681 goto exit_res ;
679682 }
680- while ( fscanf (f , "%d:%m[^:]:%m[^\n]" , & lnum , & namespecs , & cgroup ) != EOF ) {
683+ while ( (num = fscanf (f , "%d:%m[^:]:%m[^\n]\n" , & lnum ,
684+ & namespecs , & cgroup )) != EOF ) {
681685 if (namespecs && strstr (namespecs , "cpuacct" )) {
682686 free (namespecs );
683687 break ;
@@ -690,6 +694,11 @@ static int get_realtime_budget(void)
690694 free (namespecs );
691695 namespecs = NULL ;
692696 }
697+ /* not to get stuck if format changes */
698+ if ((num < 3 ) && ((fscanf (f , "%*[^\n]" ) == EOF ) ||
699+ (fscanf (f , "\n" ) == EOF ))) {
700+ break ;
701+ }
693702 }
694703 fclose (f );
695704 if (cgroup == NULL ) {
@@ -720,9 +729,15 @@ static int get_realtime_budget(void)
720729}
721730
722731/* stolen from corosync */
732+
733+ #define LEGACY_CGROUP_PROC_PIDS "/sys/fs/cgroup/cpu/tasks"
734+ #define UNIFIED_CGROUP_PROC_PIDS "/sys/fs/cgroup/cgroup.procs"
735+
723736static int sbd_move_to_root_cgroup (bool enforce_root_cgroup ) {
724737 FILE * f ;
725- int res = -1 ;
738+ int res = -1 , num ;
739+ char * rt_rq_name = NULL ;
740+ const char * root_pids = LEGACY_CGROUP_PROC_PIDS ;
726741
727742 /*
728743 * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now
@@ -731,13 +746,53 @@ static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
731746 * This feature is expected to be removed as soon as systemd gets support
732747 * for managing RT configuration.
733748 */
734- f = fopen ("/sys/fs/cgroup/cpu/cpu.rt_runtime_us" , "rt" );
735- if (f == NULL ) {
736- cl_log (LOG_DEBUG , "cpu.rt_runtime_us doesn't exist -> "
749+ do {
750+ f = fopen ("/sys/fs/cgroup/cpu/cpu.rt_runtime_us" , "rt" );
751+ if (f ) {
752+ break ;
753+ }
754+ /* CONFIG_RT_GROUP_SCHED might still be enabled with cgroup-v2
755+ cgroup.procs on cgroup-toplevel tells us we have cgroup-v2
756+ (handy as we already need that to be in selinux-policy)
757+ and name of rt_rq(s) in /proc/sched_debug tells us that
758+ CONFIG_RT_GROUP_SCHED is enabled
759+ cgroup-v2 has been around for a while in the kernel and it
760+ is no mutual exclusive compile-time-configuration - so
761+ checking what is actually mounted to go with what is there
762+ */
763+ f = fopen (UNIFIED_CGROUP_PROC_PIDS , "rt" );
764+ if (f ) {
765+ fclose (f );
766+ f = fopen ("/proc/sched_debug" , "rt" );
767+ if (f ) {
768+ while (((num = fscanf (f , "rt_rq[%*[^]]]:%m[^\n]\n" ,
769+ & rt_rq_name )) != EOF ) &&
770+ (rt_rq_name == NULL )) {
771+ /* consume a line */
772+ if ((num > 0 ) || (fscanf (f , "%*[^\n]" ) == EOF ) ||
773+ (fscanf (f , "\n" ) == EOF )) {
774+ break ;
775+ }
776+ }
777+ /* no hierarchical rt-budget distribution with
778+ cgroup-v2 so far - thus checking for budget is
779+ useless
780+ */
781+ if (rt_rq_name ) {
782+ free (rt_rq_name );
783+ enforce_root_cgroup = true;
784+ root_pids = UNIFIED_CGROUP_PROC_PIDS ;
785+ break ;
786+ }
787+ fclose (f );
788+ }
789+ }
790+ cl_log (LOG_DEBUG , "cpu.rt_runtime_us doesn't exist & "
791+ "/proc/sched_debug doesn't contain rt_rq[...]:/ -> "
737792 "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED" );
738793 res = 0 ;
739794 goto exit_res ;
740- }
795+ } while ( 0 );
741796 fclose (f );
742797
743798 if ((!enforce_root_cgroup ) && (get_realtime_budget () > 0 )) {
@@ -747,21 +802,23 @@ static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
747802 goto exit_res ;
748803 }
749804
750- f = fopen ("/sys/fs/cgroup/cpu/tasks" , "w" );
805+ f = fopen (root_pids , "w" );
751806 if (f == NULL ) {
752- cl_log (LOG_WARNING , "Can't open cgroups tasks file for writing" );
807+ cl_log (LOG_WARNING , "Can't open %s for writing" , root_pids );
753808
754809 goto exit_res ;
755810 }
756811
757812 if (fprintf (f , "%jd\n" , (intmax_t )getpid ()) <= 0 ) {
758- cl_log (LOG_WARNING , "Can't write sbd pid into cgroups tasks file" );
813+ cl_log (LOG_WARNING , "Can't write sbd pid into %s" , root_pids );
759814 goto close_and_exit_res ;
760815 }
761816
817+ res = 0 ;
818+
762819close_and_exit_res :
763820 if (fclose (f ) != 0 ) {
764- cl_log (LOG_WARNING , "Can't close cgroups tasks file" );
821+ cl_log (LOG_WARNING , "Can't close %s" , root_pids );
765822 goto exit_res ;
766823 }
767824
@@ -776,15 +833,17 @@ sbd_make_realtime(int priority, int stackgrowK, int heapgrowK)
776833 return ;
777834 }
778835
836+ do {
779837#ifdef SCHED_RR
780838 if (move_to_root_cgroup ) {
781839 sbd_move_to_root_cgroup (enforce_moving_to_root_cgroup );
782840 }
783841
784842 {
785- int pcurrent = 0 ;
786843 int pmin = sched_get_priority_min (SCHED_RR );
787844 int pmax = sched_get_priority_max (SCHED_RR );
845+ struct sched_param sp ;
846+ int pcurrent ;
788847
789848 if (priority == 0 ) {
790849 priority = pmax ;
@@ -794,26 +853,47 @@ sbd_make_realtime(int priority, int stackgrowK, int heapgrowK)
794853 priority = pmax ;
795854 }
796855
797- pcurrent = sched_getscheduler (0 );
798- if (pcurrent < 0 ) {
856+ if (sched_getparam (0 , & sp ) < 0 ) {
799857 cl_perror ("Unable to get scheduler priority" );
800858
801- } else if ( pcurrent < priority ) {
802- struct sched_param sp ;
859+ } else if (( pcurrent = sched_getscheduler ( 0 )) < 0 ) {
860+ cl_perror ( "Unable to get scheduler policy" ) ;
803861
862+ } else if ((pcurrent == SCHED_RR ) &&
863+ (sp .sched_priority >= priority )) {
864+ cl_log (LOG_INFO ,
865+ "Stay with priority (%d) for policy SCHED_RR" ,
866+ sp .sched_priority );
867+ break ;
868+ } else {
804869 memset (& sp , 0 , sizeof (sp ));
805870 sp .sched_priority = priority ;
806871
807872 if (sched_setscheduler (0 , SCHED_RR , & sp ) < 0 ) {
808- cl_perror ("Unable to set scheduler priority to %d" , priority );
873+ cl_perror (
874+ "Unable to set scheduler policy to SCHED_RR priority %d" ,
875+ priority );
809876 } else {
810- cl_log (LOG_INFO , "Scheduler priority is now %d" , priority );
877+ cl_log (LOG_INFO ,
878+ "Scheduler policy is now SCHED_RR priority %d" ,
879+ priority );
880+ break ;
811881 }
812882 }
813883 }
814884#else
815- cl_log (LOG_ERR , "System does not support updating the scheduler priority" );
885+ cl_log (LOG_ERR , "System does not support updating the scheduler policy" );
886+ #endif
887+ #ifdef PRIO_PGRP
888+ if (setpriority (PRIO_PGRP , 0 , INT_MIN ) < 0 ) {
889+ cl_perror ("Unable to raise the scheduler priority" );
890+ } else {
891+ cl_log (LOG_INFO , "Scheduler priority raised to the maximum" );
892+ }
893+ #else
894+ cl_perror ("System does not support setting the scheduler priority" );
816895#endif
896+ } while (0 );
817897
818898 sbd_memlock (heapgrowK , stackgrowK );
819899}
@@ -826,7 +906,7 @@ maximize_priority(void)
826906 return ;
827907 }
828908
829- sbd_make_realtime (0 , 256 , 256 );
909+ sbd_make_realtime (0 , 256 , 256 );
830910
831911 if (ioprio_set (IOPRIO_WHO_PROCESS , getpid (),
832912 IOPRIO_PRIO_VALUE (IOPRIO_CLASS_RT , 1 )) != 0 ) {
0 commit comments