Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmd/buildkitd/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ type OCIConfig struct {
ProxySnapshotterPath string `toml:"proxySnapshotterPath"`
DefaultCgroupParent string `toml:"defaultCgroupParent"`

// IsolateCgroups keeps all cgroups (including DefaultCgroupParent) under
// the cgroup hierarchy of the buildkitd process.
IsolateCgroups bool `toml:"isolateCgroups"`

// StargzSnapshotterConfig is configuration for stargz snapshotter.
// We use a generic map[string]interface{} in order to remove the dependency
// on stargz snapshotter's config pkg from our config.
Expand Down
9 changes: 8 additions & 1 deletion cmd/buildkitd/main_oci_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ func init() {
Usage: "limit the number of parallel build steps that can run at the same time",
Value: defaultConf.Workers.OCI.MaxParallelism,
},
cli.BoolFlag{
Name: "oci-isolate-cgroups",
Usage: "isolate cgroups to the cgroup hierarchy of the buildkitd process",
},
}
n := "oci-worker-rootless"
u := "enable rootless mode"
Expand Down Expand Up @@ -260,6 +264,9 @@ func applyOCIFlags(c *cli.Context, cfg *config.Config) error {
if c.GlobalIsSet("oci-max-parallelism") {
cfg.Workers.OCI.MaxParallelism = c.GlobalInt("oci-max-parallelism")
}
if c.GlobalIsSet("oci-isolate-cgroups") {
cfg.Workers.OCI.IsolateCgroups = c.GlobalBool("oci-isolate-cgroups")
}

return nil
}
Expand Down Expand Up @@ -327,7 +334,7 @@ func ociWorkerInitializer(c *cli.Context, common workerInitializerOpt) ([]worker
parallelismSem = semaphore.NewWeighted(int64(cfg.MaxParallelism))
}

opt, err := runc.NewWorkerOpt(common.config.Root, snFactory, cfg.Rootless, processMode, cfg.Labels, idmapping, nc, dns, cfg.Binary, cfg.ApparmorProfile, cfg.SELinux, parallelismSem, common.traceSocket, cfg.DefaultCgroupParent, cdiManager)
opt, err := runc.NewWorkerOpt(common.config.Root, snFactory, cfg.Rootless, processMode, cfg.Labels, idmapping, nc, dns, cfg.Binary, cfg.ApparmorProfile, cfg.SELinux, parallelismSem, common.traceSocket, cfg.DefaultCgroupParent, cfg.IsolateCgroups, cdiManager)
if err != nil {
return nil, err
}
Expand Down
6 changes: 6 additions & 0 deletions docs/buildkitd.toml.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ provenanceEnvDir = "/etc/buildkit/provenance.d"
# maintain a pool of reusable CNI network namespaces to amortize the overhead
# of allocating and releasing the namespaces
cniPoolSize = 16
# defaultCgroupParent sets the parent cgroup of all containers.
defaultCgroupParent = "buildkit"
# isolateCgroups keeps all buildkitd managed cgroups under the cgroup
# hierarchy of the buildkitd process. if you are running buildkitd in
# Kubernetes, set this to true to ensure resource limits work as expected.
isolateCgroups = true

[worker.oci.labels]
"foo" = "bar"
Expand Down
2 changes: 1 addition & 1 deletion executor/containerdexecutor/executor_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ func (w *containerdExecutor) createOCISpec(ctx context.Context, id, resolvConf,
}

processMode := oci.ProcessSandbox // FIXME(AkihiroSuda)
spec, cleanup, err := oci.GenerateSpec(ctx, meta, mounts, id, resolvConf, hostsFile, namespace, w.cgroupParent, processMode, nil, w.apparmorProfile, w.selinux, w.traceSocket, w.cdiManager, opts...)
spec, cleanup, err := oci.GenerateSpec(ctx, meta, mounts, id, resolvConf, hostsFile, namespace, w.cgroupParent, "", processMode, nil, w.apparmorProfile, w.selinux, w.traceSocket, w.cdiManager, opts...)
if err != nil {
releaseAll()
return nil, nil, err
Expand Down
2 changes: 1 addition & 1 deletion executor/containerdexecutor/executor_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func (w *containerdExecutor) createOCISpec(ctx context.Context, id, _, _ string,
}

processMode := oci.ProcessSandbox // FIXME(AkihiroSuda)
spec, cleanup, err := oci.GenerateSpec(ctx, meta, mounts, id, "", "", namespace, "", processMode, nil, "", false, w.traceSocket, nil, opts...)
spec, cleanup, err := oci.GenerateSpec(ctx, meta, mounts, id, "", "", namespace, "", "", processMode, nil, "", false, w.traceSocket, nil, opts...)
if err != nil {
releaseAll()
return nil, nil, err
Expand Down
13 changes: 9 additions & 4 deletions executor/oci/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ const (
// Note that NoProcessSandbox allows build containers to kill (and potentially ptrace) an arbitrary process in the BuildKit host namespace.
// NoProcessSandbox should be enabled only when the BuildKit is running in a container as an unprivileged user.
NoProcessSandbox

// cgroupNamespace is the cgroup under which container cgroups are created.
cgroupNamespace = "buildkit"
)

var tracingEnvVars = []string{
Expand All @@ -61,7 +64,7 @@ func (pm ProcessMode) String() string {

// GenerateSpec generates spec using containerd functionality.
// opts are ignored for s.Process, s.Hostname, and s.Mounts .
func GenerateSpec(ctx context.Context, meta executor.Meta, mounts []executor.Mount, id, resolvConf, hostsFile string, namespace network.Namespace, cgroupParent string, processMode ProcessMode, idmap *user.IdentityMapping, apparmorProfile string, selinuxB bool, tracingSocket string, cdiManager *cdidevices.Manager, opts ...oci.SpecOpts) (*specs.Spec, func(), error) {
func GenerateSpec(ctx context.Context, meta executor.Meta, mounts []executor.Mount, id, resolvConf, hostsFile string, namespace network.Namespace, cgroupParent string, cgroupRoot string, processMode ProcessMode, idmap *user.IdentityMapping, apparmorProfile string, selinuxB bool, tracingSocket string, cdiManager *cdidevices.Manager, opts ...oci.SpecOpts) (*specs.Spec, func(), error) {
c := &containers.Container{
ID: id,
}
Expand All @@ -75,15 +78,17 @@ func GenerateSpec(ctx context.Context, meta executor.Meta, mounts []executor.Mou
if strings.Contains(cgroupParent, ".slice") && lastSeparator == ":" {
cgroupsPath = cgroupParent + id
} else {
cgroupsPath = filepath.Join("/", cgroupParent, "buildkit", id)
cgroupsPath = filepath.Join(cgroupParent, cgroupNamespace, id)
}
opts = append(opts, oci.WithCgroup(cgroupsPath))
opts = append(opts, oci.WithCgroup(filepath.Join("/", cgroupRoot, cgroupsPath)))
} else if cgroupRoot != "" {
opts = append(opts, oci.WithCgroup(filepath.Join("/", cgroupRoot, cgroupNamespace, id)))
}

// containerd/oci.GenerateSpec requires a namespace, which
// will be used to namespace specs.Linux.CgroupsPath if generated
if _, ok := namespaces.Namespace(ctx); !ok {
ctx = namespaces.WithNamespace(ctx, "buildkit")
ctx = namespaces.WithNamespace(ctx, cgroupNamespace)
}

opts = append(opts, generateMountOpts(resolvConf, hostsFile)...)
Expand Down
72 changes: 51 additions & 21 deletions executor/resources/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,11 @@ func (r *nopRecord) Start() {
}

type Monitor struct {
mu sync.Mutex
closed chan struct{}
records map[string]*cgroupRecord
proc procfs.FS
mu sync.Mutex
closed chan struct{}
records map[string]*cgroupRecord
proc procfs.FS
rootCgroup string
}

type NetworkSampler interface {
Expand Down Expand Up @@ -221,63 +222,92 @@ func (m *Monitor) Close() error {
return nil
}

func NewMonitor() (*Monitor, error) {
func (m *Monitor) RootCgroup() string {
return m.rootCgroup
}

func NewMonitor(isolateCgroups bool) (*Monitor, error) {
fs, err := procfs.NewDefaultFS()
if err != nil {
return nil, err
}

rootCgroup := ""

initOnce.Do(func() {
isCgroupV2 = isCgroup2()
if !isCgroupV2 {
return
}
if err := prepareCgroupControllers(); err != nil {

cgroupPath := defaultMountpoint

if isolateCgroups {
proc, err := fs.Self()
if err != nil {
bklog.L.Warnf("failed to get current process info: %+v", err)
return
}

cgroups, err := proc.Cgroups()
if err != nil {
bklog.L.Warnf("failed to get current cgroups: %+v", err)
return
}

if len(cgroups) > 0 {
rootCgroup = cgroups[0].Path
cgroupPath = filepath.Join(cgroupPath, cgroups[0].Path)
}
}

if err := prepareCgroupControllers(cgroupPath); err != nil {
bklog.L.Warnf("failed to prepare cgroup controllers: %+v", err)
}
})

fs, err := procfs.NewDefaultFS()
if err != nil {
return nil, err
}

return &Monitor{
closed: make(chan struct{}),
records: make(map[string]*cgroupRecord),
proc: fs,
rootCgroup: rootCgroup,
closed: make(chan struct{}),
records: make(map[string]*cgroupRecord),
proc: fs,
}, nil
}

func prepareCgroupControllers() error {
func prepareCgroupControllers(cgroupPath string) error {
v, ok := os.LookupEnv("BUILDKIT_SETUP_CGROUPV2_ROOT")
if !ok {
return nil
}
if b, _ := strconv.ParseBool(v); !b {
return nil
}
// move current process to init cgroup
if err := os.MkdirAll(filepath.Join(defaultMountpoint, initGroup), 0755); err != nil {
// move current process to an init cgroup subgroup
if err := os.MkdirAll(filepath.Join(cgroupPath, initGroup), 0755); err != nil {
return err
}
f, err := os.OpenFile(filepath.Join(defaultMountpoint, cgroupProcsFile), os.O_RDONLY, 0)
f, err := os.OpenFile(filepath.Join(cgroupPath, cgroupProcsFile), os.O_RDONLY, 0)
if err != nil {
return err
}
s := bufio.NewScanner(f)
for s.Scan() {
if err := os.WriteFile(filepath.Join(defaultMountpoint, initGroup, cgroupProcsFile), s.Bytes(), 0); err != nil {
if err := os.WriteFile(filepath.Join(cgroupPath, initGroup, cgroupProcsFile), s.Bytes(), 0); err != nil {
return err
}
}
if err := f.Close(); err != nil {
return err
}
dt, err := os.ReadFile(filepath.Join(defaultMountpoint, cgroupControllersFile))
dt, err := os.ReadFile(filepath.Join(cgroupPath, cgroupControllersFile))
if err != nil {
return err
}
for c := range strings.SplitSeq(string(dt), " ") {
if c == "" {
continue
}
if err := os.WriteFile(filepath.Join(defaultMountpoint, cgroupSubtreeFile), []byte("+"+c), 0); err != nil {
if err := os.WriteFile(filepath.Join(cgroupPath, cgroupSubtreeFile), []byte("+"+c), 0); err != nil {
// ignore error
bklog.L.Warnf("failed to enable cgroup controller %q: %+v", c, err)
}
Expand Down
9 changes: 7 additions & 2 deletions executor/runcexecutor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,11 @@ type Opt struct {
CommandCandidates []string
// without root privileges (has nothing to do with Opt.Root directory)
Rootless bool
// DefaultCgroupParent is the cgroup-parent name for executor
// DefaultCgroupParent is the default cgroup-parent name for executor
DefaultCgroupParent string
// RootCgroup ensures all cgroups (including DefaultCgroupParent) are
// created beneath the given root cgroup.
RootCgroup string
// ProcessMode
ProcessMode oci.ProcessMode
IdentityMapping *user.IdentityMapping
Expand All @@ -67,6 +70,7 @@ type runcExecutor struct {
runc *runc.Runc
root string
cgroupParent string
rootCgroup string
rootless bool
networkProviders map[pb.NetMode]network.Provider
processMode oci.ProcessMode
Expand Down Expand Up @@ -135,6 +139,7 @@ func New(opt Opt, networkProviders map[pb.NetMode]network.Provider) (executor.Ex
runc: runtime,
root: root,
cgroupParent: opt.DefaultCgroupParent,
rootCgroup: opt.RootCgroup,
rootless: opt.Rootless,
networkProviders: networkProviders,
processMode: opt.ProcessMode,
Expand Down Expand Up @@ -268,7 +273,7 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount,
}
}

spec, cleanup, err := oci.GenerateSpec(ctx, meta, mounts, id, resolvConf, hostsFile, namespace, w.cgroupParent, w.processMode, w.idmap, w.apparmorProfile, w.selinux, w.tracingSocket, w.cdiManager, opts...)
spec, cleanup, err := oci.GenerateSpec(ctx, meta, mounts, id, resolvConf, hostsFile, namespace, w.cgroupParent, w.rootCgroup, w.processMode, w.idmap, w.apparmorProfile, w.selinux, w.tracingSocket, w.cdiManager, opts...)
if err != nil {
return nil, err
}
Expand Down
5 changes: 3 additions & 2 deletions worker/runc/runc.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ type SnapshotterFactory struct {
}

// NewWorkerOpt creates a WorkerOpt.
func NewWorkerOpt(root string, snFactory SnapshotterFactory, rootless bool, processMode oci.ProcessMode, labels map[string]string, idmap *user.IdentityMapping, nopt netproviders.Opt, dns *oci.DNSConfig, binary, apparmorProfile string, selinux bool, parallelismSem *semaphore.Weighted, traceSocket, defaultCgroupParent string, cdiManager *cdidevices.Manager) (base.WorkerOpt, error) {
func NewWorkerOpt(root string, snFactory SnapshotterFactory, rootless bool, processMode oci.ProcessMode, labels map[string]string, idmap *user.IdentityMapping, nopt netproviders.Opt, dns *oci.DNSConfig, binary, apparmorProfile string, selinux bool, parallelismSem *semaphore.Weighted, traceSocket, defaultCgroupParent string, isolateCgroups bool, cdiManager *cdidevices.Manager) (base.WorkerOpt, error) {
var opt base.WorkerOpt
name := "runc-" + snFactory.Name
root = filepath.Join(root, name)
Expand All @@ -59,7 +59,7 @@ func NewWorkerOpt(root string, snFactory SnapshotterFactory, rootless bool, proc
cmds = append(cmds, binary)
}

rm, err := resources.NewMonitor()
rm, err := resources.NewMonitor(isolateCgroups)
if err != nil {
return opt, err
}
Expand All @@ -79,6 +79,7 @@ func NewWorkerOpt(root string, snFactory SnapshotterFactory, rootless bool, proc
SELinux: selinux,
TracingSocket: traceSocket,
DefaultCgroupParent: defaultCgroupParent,
RootCgroup: rm.RootCgroup(),
ResourceMonitor: rm,
CDIManager: cdiManager,
}, np)
Expand Down
2 changes: 1 addition & 1 deletion worker/runc/runc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func newWorkerOpt(t *testing.T, processMode oci.ProcessMode) base.WorkerOpt {
},
}
rootless := false
workerOpt, err := NewWorkerOpt(tmpdir, snFactory, rootless, processMode, nil, nil, netproviders.Opt{Mode: "host"}, nil, "", "", false, nil, "", "", nil)
workerOpt, err := NewWorkerOpt(tmpdir, snFactory, rootless, processMode, nil, nil, netproviders.Opt{Mode: "host"}, nil, "", "", false, nil, "", "", false, nil)
require.NoError(t, err)

return workerOpt
Expand Down
Loading