1
1
use std:: fs:: File ;
2
+ use std:: ops:: Deref ;
2
3
use std:: os:: fd:: AsFd ;
3
4
use std:: path:: Path ;
4
5
@@ -38,23 +39,17 @@ impl IdMap {
38
39
}
39
40
}
40
41
41
- pub struct MntNamespace {
42
- mnt_fd : File ,
42
+ pub struct UserNamespace {
43
43
uid_map : IdMap ,
44
44
gid_map : IdMap ,
45
45
}
46
46
47
- impl MntNamespace {
47
+ impl UserNamespace {
48
48
/// Open the mount namespace of a process.
49
- pub fn of_pid ( pid : Pid ) -> Result < MntNamespace > {
50
- let mnt_fd = File :: open ( format ! ( "/proc/{}/ns/mnt" , pid. as_raw_nonzero( ) ) ) ?;
49
+ pub fn of_pid ( pid : Pid ) -> Result < Self > {
51
50
let uid_map = IdMap :: read ( format ! ( "/proc/{}/uid_map" , pid. as_raw_nonzero( ) ) . as_ref ( ) ) ?;
52
51
let gid_map = IdMap :: read ( format ! ( "/proc/{}/gid_map" , pid. as_raw_nonzero( ) ) . as_ref ( ) ) ?;
53
- Ok ( MntNamespace {
54
- mnt_fd,
55
- uid_map,
56
- gid_map,
57
- } )
52
+ Ok ( Self { uid_map, gid_map } )
58
53
}
59
54
60
55
/// Check if we're in an user namespace.
@@ -72,48 +67,85 @@ impl MntNamespace {
72
67
Ok ( self . gid_map . translate ( gid) . context ( "GID overflows" ) ?)
73
68
}
74
69
70
+ /// "Enter" the user namespace.
71
+ ///
72
+ /// This operation is not reversible.
73
+ ///
74
+ /// This does not actually enter the user namespace, but rather just switch to become the root
75
+ /// user inside the namespace.
76
+ ///
77
+ /// Entering the user namespace turns out to be problematic.
78
+ /// The reason seems to be this line [1]:
79
+ /// which means `CAP_MKNOD` capability of the *init* namespace is needed.
80
+ /// However task's associated security context is all relative to its current
81
+ /// user namespace [2], so once you enter a user namespace there's no way of getting
82
+ /// back `CAP_MKNOD` of the init namespace anymore.
83
+ /// (Yes this means that even if CAP_MKNOD is granted to the container, you cannot
84
+ /// create device nodes within it.)
85
+ ///
86
+ /// [1]: https://elixir.bootlin.com/linux/v6.11.1/source/fs/namei.c#L4073
87
+ /// [2]: https://elixir.bootlin.com/linux/v6.11.1/source/include/linux/cred.h#L111
88
+ pub fn enter ( & self ) -> Result < ( ) > {
89
+ // By default `setuid` will drop capabilities when transitioning from root
90
+ // to non-root user. This bit prevents it so our code still have superpower.
91
+ rustix:: thread:: set_capabilities_secure_bits ( CapabilitiesSecureBits :: NO_SETUID_FIXUP ) ?;
92
+
93
+ rustix:: thread:: set_thread_uid ( Uid :: from_raw ( self . uid ( 0 ) ?) ) ?;
94
+ rustix:: thread:: set_thread_gid ( Gid :: from_raw ( self . gid ( 0 ) ?) ) ?;
95
+ Ok ( ( ) )
96
+ }
97
+ }
98
+
99
+ pub struct MntNamespace {
100
+ mnt_fd : File ,
101
+ user_ns : UserNamespace ,
102
+ }
103
+
104
+ impl Deref for MntNamespace {
105
+ type Target = UserNamespace ;
106
+
107
+ fn deref ( & self ) -> & UserNamespace {
108
+ & self . user_ns
109
+ }
110
+ }
111
+
112
+ impl MntNamespace {
113
+ /// Open the mount namespace of a process.
114
+ pub fn of_pid ( pid : Pid ) -> Result < MntNamespace > {
115
+ let mnt_fd = File :: open ( format ! ( "/proc/{}/ns/mnt" , pid. as_raw_nonzero( ) ) ) ?;
116
+ let user_ns = UserNamespace :: of_pid ( pid) ?;
117
+ Ok ( MntNamespace { mnt_fd, user_ns } )
118
+ }
119
+
75
120
/// Enter the mount namespace.
76
- pub fn enter < T : Send , F : FnOnce ( ) -> T + Send > ( & self , f : F ) -> Result < T > {
121
+ ///
122
+ /// This operation is not reversible.
123
+ pub fn enter ( & self ) -> Result < ( ) > {
124
+ // Unshare FS for this specific thread so we can switch to another namespace.
125
+ // Not doing this will cause EINVAL when switching to namespaces.
126
+ rustix:: thread:: unshare ( UnshareFlags :: FS ) ?;
127
+
128
+ // Switch this particular thread to the container's mount namespace.
129
+ rustix:: thread:: move_into_link_name_space (
130
+ self . mnt_fd . as_fd ( ) ,
131
+ Some ( LinkNameSpaceType :: Mount ) ,
132
+ ) ?;
133
+
134
+ // If user namespace is used, we must act like the root user *inside*
135
+ // namespace to be able to create files properly (otherwise EOVERFLOW
136
+ // will be returned when creating file).
137
+ self . user_ns . enter ( ) ?;
138
+ Ok ( ( ) )
139
+ }
140
+
141
+ /// Execute inside the mount namespace.
142
+ pub fn with < T : Send , F : FnOnce ( ) -> T + Send > ( & self , f : F ) -> Result < T > {
77
143
// To avoid messing with rest of the process, we do everything in a new thread.
78
144
// Use scoped thread to avoid 'static bound (we need to access fd).
79
145
std:: thread:: scope ( |scope| {
80
146
scope
81
147
. spawn ( || -> Result < T > {
82
- // Unshare FS for this specific thread so we can switch to another namespace.
83
- // Not doing this will cause EINVAL when switching to namespaces.
84
- rustix:: thread:: unshare ( UnshareFlags :: FS ) ?;
85
-
86
- // Switch this particular thread to the container's mount namespace.
87
- rustix:: thread:: move_into_link_name_space (
88
- self . mnt_fd . as_fd ( ) ,
89
- Some ( LinkNameSpaceType :: Mount ) ,
90
- ) ?;
91
-
92
- // If user namespace is used, we must act like the root user *inside*
93
- // namespace to be able to create files properly (otherwise EOVERFLOW
94
- // will be returned when creating file).
95
- //
96
- // Entering the user namespace turns out to be problematic.
97
- // The reason seems to be this line [1]:
98
- // which means `CAP_MKNOD` capability of the *init* namespace is needed.
99
- // However task's associated security context is all relative to its current
100
- // user namespace [2], so once you enter a user namespace there's no way of getting
101
- // back `CAP_MKNOD` of the init namespace anymore.
102
- // (Yes this means that even if CAP_MKNOD is granted to the container, you cannot
103
- // create device nodes within it.)
104
- //
105
- // [1]: https://elixir.bootlin.com/linux/v6.11.1/source/fs/namei.c#L4073
106
- // [2]: https://elixir.bootlin.com/linux/v6.11.1/source/include/linux/cred.h#L111
107
-
108
- // By default `setuid` will drop capabilities when transitioning from root
109
- // to non-root user. This bit prevents it so our code still have superpower.
110
- rustix:: thread:: set_capabilities_secure_bits (
111
- CapabilitiesSecureBits :: NO_SETUID_FIXUP ,
112
- ) ?;
113
-
114
- rustix:: thread:: set_thread_uid ( Uid :: from_raw ( self . uid ( 0 ) ?) ) ?;
115
- rustix:: thread:: set_thread_gid ( Gid :: from_raw ( self . gid ( 0 ) ?) ) ?;
116
-
148
+ self . enter ( ) ?;
117
149
Ok ( f ( ) )
118
150
} )
119
151
. join ( )
0 commit comments