Skip to content

Commit 8c64a2f

Browse files
authored
Merge pull request containerd#10607 from fuweid/pin-userns
internal/cri: simplify netns setup with pinned userns
2 parents 67b0687 + ee0ed75 commit 8c64a2f

16 files changed

+685
-153
lines changed

internal/cri/opts/spec_opts.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,23 @@ func WithoutNamespace(t runtimespec.LinuxNamespaceType) oci.SpecOpts {
301301
}
302302
}
303303

304+
// WithNamespacePath updates namespace with existing path.
305+
func WithNamespacePath(t runtimespec.LinuxNamespaceType, nsPath string) oci.SpecOpts {
306+
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
307+
if s.Linux == nil {
308+
return fmt.Errorf("Linux spec is required")
309+
}
310+
311+
for i, ns := range s.Linux.Namespaces {
312+
if ns.Type == t {
313+
s.Linux.Namespaces[i].Path = nsPath
314+
return nil
315+
}
316+
}
317+
return fmt.Errorf("no such namespace %s", t)
318+
}
319+
}
320+
304321
// WithPodNamespaces sets the pod namespaces for the container
305322
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, sandboxPid uint32, targetPid uint32, uids, gids []runtimespec.LinuxIDMapping) oci.SpecOpts {
306323
namespaces := config.GetNamespaceOptions()

internal/cri/server/podsandbox/helpers_linux.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import (
4040
"github.com/containerd/containerd/v2/core/snapshots"
4141
"github.com/containerd/containerd/v2/internal/cri/seutil"
4242
"github.com/containerd/containerd/v2/pkg/seccomp"
43+
"github.com/containerd/containerd/v2/pkg/sys"
4344
)
4445

4546
const (
@@ -88,6 +89,50 @@ func (c *Controller) getSandboxDevShm(id string) string {
8889
return filepath.Join(c.getVolatileSandboxRootDir(id), "shm")
8990
}
9091

92+
// getSandboxPinnedNamespaces returns the pinned namespaces directory inside the
93+
// sandbox state directory.
94+
func (c *Controller) getSandboxPinnedNamespaces(id string) string {
95+
return filepath.Join(c.getVolatileSandboxRootDir(id), "pinned-namespaces")
96+
}
97+
98+
// getSandboxPinnedUserNamespace returns the pinned user namespace file.
99+
func (c *Controller) getSandboxPinnedUserNamespace(id string) string {
100+
return filepath.Join(c.getSandboxPinnedNamespaces(id), "user")
101+
}
102+
103+
// pinUserNamespace persists user namespace in namespace filesystem.
104+
func (c *Controller) pinUserNamespace(sandboxID string, netnsPath string) error {
105+
nsPath := c.getSandboxPinnedUserNamespace(sandboxID)
106+
107+
baseDir := filepath.Dir(nsPath)
108+
if err := os.MkdirAll(baseDir, 0755); err != nil {
109+
return fmt.Errorf("failed to init pinned-namespaces directory %s: %w", baseDir, err)
110+
}
111+
112+
emptyFd, err := os.OpenFile(nsPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
113+
if err != nil {
114+
return fmt.Errorf("failed to create empty file %s: %w", nsPath, err)
115+
}
116+
emptyFd.Close()
117+
118+
netnsFd, err := os.Open(netnsPath)
119+
if err != nil {
120+
return fmt.Errorf("failed to open netns(%s): %w", netnsPath, err)
121+
}
122+
defer netnsFd.Close()
123+
124+
usernsFd, err := sys.GetUsernsForNamespace(netnsFd.Fd())
125+
if err != nil {
126+
return fmt.Errorf("failed to get user namespace for netns(%s): %w", netnsPath, err)
127+
}
128+
defer usernsFd.Close()
129+
130+
if err = unix.Mount(usernsFd.Name(), nsPath, "none", unix.MS_BIND, ""); err != nil {
131+
return fmt.Errorf("failed to bind mount ns src: %v at %s: %w", usernsFd.Name(), nsPath, err)
132+
}
133+
return nil
134+
}
135+
91136
func toLabel(selinuxOptions *runtime.SELinuxOption) ([]string, error) {
92137
var labels []string
93138

internal/cri/server/podsandbox/sandbox_run.go

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,39 @@ func (c *Controller) Start(ctx context.Context, id string) (cin sandbox.Controll
9595

9696
labels["oci_runtime_type"] = ociRuntime.Type
9797

98+
// Create sandbox container root directories.
99+
sandboxRootDir := c.getSandboxRootDir(id)
100+
if err := c.os.MkdirAll(sandboxRootDir, 0755); err != nil {
101+
return cin, fmt.Errorf("failed to create sandbox root directory %q: %w",
102+
sandboxRootDir, err)
103+
}
104+
defer func() {
105+
if retErr != nil && cleanupErr == nil {
106+
// Cleanup the sandbox root directory.
107+
if cleanupErr = c.os.RemoveAll(sandboxRootDir); cleanupErr != nil {
108+
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove sandbox root directory %q",
109+
sandboxRootDir)
110+
}
111+
}
112+
}()
113+
114+
volatileSandboxRootDir := c.getVolatileSandboxRootDir(id)
115+
if err := c.os.MkdirAll(volatileSandboxRootDir, 0755); err != nil {
116+
return cin, fmt.Errorf("failed to create volatile sandbox root directory %q: %w",
117+
volatileSandboxRootDir, err)
118+
}
119+
defer func() {
120+
if retErr != nil && cleanupErr == nil {
121+
deferCtx, deferCancel := ctrdutil.DeferContext()
122+
defer deferCancel()
123+
// Cleanup the volatile sandbox root directory.
124+
if cleanupErr = ensureRemoveAll(deferCtx, volatileSandboxRootDir); cleanupErr != nil {
125+
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove volatile sandbox root directory %q",
126+
volatileSandboxRootDir)
127+
}
128+
}
129+
}()
130+
98131
// Create sandbox container.
99132
// NOTE: sandboxContainerSpec SHOULD NOT have side
100133
// effect, e.g. accessing/creating files, so that we can test
@@ -164,37 +197,6 @@ func (c *Controller) Start(ctx context.Context, id string) (cin sandbox.Controll
164197
}
165198
}()
166199

167-
// Create sandbox container root directories.
168-
sandboxRootDir := c.getSandboxRootDir(id)
169-
if err := c.os.MkdirAll(sandboxRootDir, 0755); err != nil {
170-
return cin, fmt.Errorf("failed to create sandbox root directory %q: %w",
171-
sandboxRootDir, err)
172-
}
173-
defer func() {
174-
if retErr != nil && cleanupErr == nil {
175-
// Cleanup the sandbox root directory.
176-
if cleanupErr = c.os.RemoveAll(sandboxRootDir); cleanupErr != nil {
177-
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove sandbox root directory %q",
178-
sandboxRootDir)
179-
}
180-
}
181-
}()
182-
183-
volatileSandboxRootDir := c.getVolatileSandboxRootDir(id)
184-
if err := c.os.MkdirAll(volatileSandboxRootDir, 0755); err != nil {
185-
return cin, fmt.Errorf("failed to create volatile sandbox root directory %q: %w",
186-
volatileSandboxRootDir, err)
187-
}
188-
defer func() {
189-
if retErr != nil && cleanupErr == nil {
190-
// Cleanup the volatile sandbox root directory.
191-
if cleanupErr = c.os.RemoveAll(volatileSandboxRootDir); cleanupErr != nil {
192-
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove volatile sandbox root directory %q",
193-
volatileSandboxRootDir)
194-
}
195-
}
196-
}()
197-
198200
// Setup files required for the sandbox.
199201
if err = c.setupSandboxFiles(id, config); err != nil {
200202
return cin, fmt.Errorf("failed to setup sandbox files: %w", err)

internal/cri/server/podsandbox/sandbox_run_linux.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ func (c *Controller) sandboxContainerSpec(id string, config *runtime.PodSandboxC
103103
case runtime.NamespaceMode_POD:
104104
specOpts = append(specOpts, oci.WithUserNamespace(uids, gids))
105105
usernsEnabled = true
106+
107+
if err := c.pinUserNamespace(id, nsPath); err != nil {
108+
return nil, fmt.Errorf("failed to pin user namespace: %w", err)
109+
}
110+
specOpts = append(specOpts, customopts.WithNamespacePath(runtimespec.UserNamespace, c.getSandboxPinnedUserNamespace(id)))
106111
default:
107112
return nil, fmt.Errorf("unsupported user namespace mode: %q", mode)
108113
}

internal/cri/server/podsandbox/sandbox_run_linux_test.go

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
package podsandbox
1818

1919
import (
20+
"context"
2021
"os"
2122
"path/filepath"
2223
"strconv"
24+
"syscall"
2325
"testing"
2426

2527
"github.com/moby/sys/userns"
@@ -32,11 +34,15 @@ import (
3234
v1 "k8s.io/cri-api/pkg/apis/runtime/v1"
3335

3436
"github.com/containerd/containerd/v2/internal/cri/annotations"
37+
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
3538
"github.com/containerd/containerd/v2/internal/cri/opts"
39+
"github.com/containerd/containerd/v2/pkg/netns"
3640
ostesting "github.com/containerd/containerd/v2/pkg/os/testing"
41+
"github.com/containerd/containerd/v2/pkg/sys"
42+
"github.com/containerd/containerd/v2/pkg/testutil"
3743
)
3844

39-
func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) {
45+
func getRunPodSandboxTestData(criCfg criconfig.Config) (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) {
4046
config := &runtime.PodSandboxConfig{
4147
Metadata: &runtime.PodSandboxMetadata{
4248
Name: "test-name",
@@ -94,7 +100,7 @@ func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConf
94100
}
95101

96102
assert.Contains(t, spec.Mounts, runtimespec.Mount{
97-
Source: "/test/root/sandboxes/test-id/resolv.conf",
103+
Source: filepath.Join(criCfg.RootDir, "sandboxes/test-id/resolv.conf"),
98104
Destination: resolvConfPath,
99105
Type: "bind",
100106
Options: []string{"rbind", "ro", "nosuid", "nodev", "noexec"},
@@ -105,8 +111,10 @@ func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConf
105111
}
106112

107113
func TestLinuxSandboxContainerSpec(t *testing.T) {
114+
testutil.RequiresRoot(t)
115+
108116
testID := "test-id"
109-
nsPath := "test-cni"
117+
110118
idMap := runtime.IDMapping{
111119
HostId: 1000,
112120
ContainerId: 1000,
@@ -118,15 +126,30 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
118126
Size: 10,
119127
}
120128

129+
netnsBasedir := t.TempDir()
130+
t.Cleanup(func() {
131+
assert.NoError(t, unmountRecursive(context.Background(), netnsBasedir))
132+
})
133+
134+
var netNs *netns.NetNS
135+
uerr := sys.UnshareAfterEnterUserns("1000:1000:10", "1000:1000:10", syscall.CLONE_NEWNET, func(pid int) error {
136+
var err error
137+
netNs, err = netns.NewNetNSFromPID(netnsBasedir, uint32(pid))
138+
return err
139+
})
140+
require.NoError(t, uerr)
141+
142+
nsPath := netNs.GetPath()
143+
121144
for _, test := range []struct {
122145
desc string
123146
configChange func(*runtime.PodSandboxConfig)
124-
specCheck func(*testing.T, *runtimespec.Spec)
147+
specCheck func(*testing.T, *Controller, *runtimespec.Spec)
125148
expectErr bool
126149
}{
127150
{
128151
desc: "spec should reflect original config",
129-
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
152+
specCheck: func(t *testing.T, _ *Controller, spec *runtimespec.Spec) {
130153
// runtime spec should have expected namespaces enabled by default.
131154
require.NotNil(t, spec.Linux)
132155
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
@@ -162,10 +185,11 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
162185
},
163186
}
164187
},
165-
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
188+
specCheck: func(t *testing.T, c *Controller, spec *runtimespec.Spec) {
166189
require.NotNil(t, spec.Linux)
167190
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
168191
Type: runtimespec.UserNamespace,
192+
Path: filepath.Join(c.config.StateDir, "sandboxes", testID, "pinned-namespaces", "user"),
169193
})
170194
assert.NotContains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
171195
},
@@ -181,7 +205,7 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
181205
},
182206
}
183207
},
184-
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
208+
specCheck: func(t *testing.T, _ *Controller, spec *runtimespec.Spec) {
185209
// runtime spec should disable expected namespaces in host mode.
186210
require.NotNil(t, spec.Linux)
187211
assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
@@ -213,10 +237,11 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
213237
},
214238
}
215239
},
216-
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
240+
specCheck: func(t *testing.T, c *Controller, spec *runtimespec.Spec) {
217241
require.NotNil(t, spec.Linux)
218242
assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
219243
Type: runtimespec.UserNamespace,
244+
Path: filepath.Join(c.config.StateDir, "sandboxes", testID, "pinned-namespaces", "user"),
220245
})
221246
require.Equal(t, spec.Linux.UIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
222247
require.Equal(t, spec.Linux.GIDMappings, []runtimespec.LinuxIDMapping{expIDMap})
@@ -314,7 +339,7 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
314339
SupplementalGroups: []int64{1111, 2222},
315340
}
316341
},
317-
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
342+
specCheck: func(t *testing.T, _ *Controller, spec *runtimespec.Spec) {
318343
require.NotNil(t, spec.Process)
319344
assert.Contains(t, spec.Process.User.AdditionalGids, uint32(1111))
320345
assert.Contains(t, spec.Process.User.AdditionalGids, uint32(2222))
@@ -328,7 +353,7 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
328353
"net.ipv4.ping_group_range": "1 1000",
329354
}
330355
},
331-
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
356+
specCheck: func(t *testing.T, _ *Controller, spec *runtimespec.Spec) {
332357
require.NotNil(t, spec.Process)
333358
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "500")
334359
assert.Contains(t, spec.Linux.Sysctl["net.ipv4.ping_group_range"], "1 1000")
@@ -344,7 +369,7 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
344369
MemoryLimitInBytes: 1024,
345370
}
346371
},
347-
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
372+
specCheck: func(t *testing.T, _ *Controller, spec *runtimespec.Spec) {
348373
value, ok := spec.Annotations[annotations.SandboxCPUPeriod]
349374
assert.True(t, ok)
350375
assert.EqualValues(t, strconv.FormatInt(100, 10), value)
@@ -365,7 +390,7 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
365390
},
366391
{
367392
desc: "sandbox sizing annotations should not be set if LinuxContainerResources were not provided",
368-
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
393+
specCheck: func(t *testing.T, _ *Controller, spec *runtimespec.Spec) {
369394
_, ok := spec.Annotations[annotations.SandboxCPUPeriod]
370395
assert.False(t, ok)
371396
_, ok = spec.Annotations[annotations.SandboxCPUQuota]
@@ -381,7 +406,7 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
381406
configChange: func(c *runtime.PodSandboxConfig) {
382407
c.Linux.Resources = &v1.LinuxContainerResources{}
383408
},
384-
specCheck: func(t *testing.T, spec *runtimespec.Spec) {
409+
specCheck: func(t *testing.T, _ *Controller, spec *runtimespec.Spec) {
385410
value, ok := spec.Annotations[annotations.SandboxCPUPeriod]
386411
assert.True(t, ok)
387412
assert.EqualValues(t, "0", value)
@@ -400,9 +425,17 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
400425
test := test
401426
t.Run(test.desc, func(t *testing.T) {
402427
c := newControllerService()
428+
c.config.RootDir = t.TempDir()
429+
c.config.StateDir = t.TempDir()
430+
431+
defer func() {
432+
assert.NoError(t, unmountRecursive(context.Background(), c.config.StateDir))
433+
}()
434+
403435
c.config.EnableUnprivilegedICMP = true
404436
c.config.EnableUnprivilegedPorts = true
405-
config, imageConfig, specCheck := getRunPodSandboxTestData()
437+
438+
config, imageConfig, specCheck := getRunPodSandboxTestData(c.config)
406439
if test.configChange != nil {
407440
test.configChange(config)
408441
}
@@ -416,7 +449,7 @@ func TestLinuxSandboxContainerSpec(t *testing.T) {
416449
assert.NotNil(t, spec)
417450
specCheck(t, testID, spec)
418451
if test.specCheck != nil {
419-
test.specCheck(t, spec)
452+
test.specCheck(t, c, spec)
420453
}
421454
})
422455
}
@@ -757,6 +790,3 @@ options timeout:1
757790
})
758791
}
759792
}
760-
761-
// TODO(random-liu): [P1] Add unit test for different error cases to make sure
762-
// the function cleans up on error properly.

internal/cri/server/podsandbox/sandbox_run_other_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,13 @@ package podsandbox
2121
import (
2222
"testing"
2323

24+
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
2425
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
2526
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
2627
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
2728
)
2829

29-
func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) {
30+
func getRunPodSandboxTestData(_ criconfig.Config) (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) {
3031
config := &runtime.PodSandboxConfig{}
3132
imageConfig := &imagespec.ImageConfig{}
3233
specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) {

0 commit comments

Comments
 (0)