Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add systemd cgroup controller support #5462

Merged
merged 2 commits into from
Apr 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions pkg/agent/containerd/config_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,21 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
}

isRunningInUserNS := userns.RunningInUserNS()
_, _, hasCFS, hasPIDs := cgroups.CheckCgroups()
_, _, controllers := cgroups.CheckCgroups()
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
disableCgroup := isRunningInUserNS && (!hasCFS || !hasPIDs || !cgroupfsWritable)
disableCgroup := isRunningInUserNS && (!controllers["cpu"] || !controllers["pids"] || !cgroupfsWritable)
if disableCgroup {
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Disabling cgroup.")
} else {
cfg.AgentConfig.Systemd = controllers["cpuset"] && os.Getenv("NOTIFY_SOCKET") != ""
}

var containerdTemplate string
containerdConfig := templates.ContainerdConfig{
NodeConfig: cfg,
DisableCgroup: disableCgroup,
SystemdCgroup: cfg.AgentConfig.Systemd,
IsRunningInUserNS: isRunningInUserNS,
PrivateRegistryConfig: privRegistries.Registry,
ExtraRuntimes: findNvidiaContainerRuntimes(os.DirFS(string(os.PathSeparator))),
Expand Down
1 change: 1 addition & 0 deletions pkg/agent/containerd/config_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
containerdConfig := templates.ContainerdConfig{
NodeConfig: cfg,
DisableCgroup: true,
SystemdCgroup: false,
IsRunningInUserNS: false,
PrivateRegistryConfig: privRegistries.Registry,
}
Expand Down
1 change: 1 addition & 0 deletions pkg/agent/templates/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ type ContainerdRuntimeConfig struct {
type ContainerdConfig struct {
NodeConfig *config.Node
DisableCgroup bool
SystemdCgroup bool
IsRunningInUserNS bool
PrivateRegistryConfig *registries.Registry
ExtraRuntimes map[string]ContainerdRuntimeConfig
Expand Down
3 changes: 3 additions & 0 deletions pkg/agent/templates/templates_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ enable_keychain = true
[plugins.cri.containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"

[plugins.cri.containerd.runtimes.runc.options]
SystemdCgroup = {{ .SystemdCgroup }}

{{ if .PrivateRegistryConfig }}
{{ if .PrivateRegistryConfig.Mirrors }}
[plugins.cri.registry.mirrors]{{end}}
Expand Down
36 changes: 16 additions & 20 deletions pkg/cgroups/cgroups_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,34 +65,30 @@ func validateCgroupsV2() error {
return nil
}

func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
func CheckCgroups() (kubeletRoot, runtimeRoot string, controllers map[string]bool) {
cgroupsModeV2 := cgroups.Mode() == cgroups.Unified
controllers = make(map[string]bool)

// For Unified (v2) cgroups we can directly check to see what controllers are mounted
// under the unified hierarchy.
if cgroupsModeV2 {
m, err := cgroupsv2.LoadManager("/sys/fs/cgroup", "/")
if err != nil {
return "", "", false, false
return
}
controllers, err := m.Controllers()
enabledControllers, err := m.Controllers()
if err != nil {
return "", "", false, false
return
}
// Intentionally using an expressionless switch to match the logic below
for _, controller := range controllers {
switch {
case controller == "cpu":
hasCFS = true
case controller == "pids":
hasPIDs = true
}
for _, controller := range enabledControllers {
controllers[controller] = true
}
}

f, err := os.Open("/proc/self/cgroup")
if err != nil {
return "", "", false, false
return
}
defer f.Close()

Expand All @@ -102,10 +98,10 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
if len(parts) < 3 {
continue
}
controllers := strings.Split(parts[1], ",")
enabledControllers := strings.Split(parts[1], ",")
// For v1 or hybrid, controller can be a single value {"blkio"}, or a comounted set {"cpu","cpuacct"}
// For v2, controllers = {""} (only contains a single empty string)
for _, controller := range controllers {
// For v2, controllers = {""} (only contains a single empty string) so this section is not used.
for _, controller := range enabledControllers {
switch {
case controller == "name=systemd" || cgroupsModeV2:
// If we detect that we are running under a `.scope` unit with systemd
Expand All @@ -128,10 +124,10 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
// can fail if we use the comma-separated name. Instead, we check for the controller using the symlink.
p := filepath.Join("/sys/fs/cgroup", controller, parts[2], "cpu.cfs_period_us")
if _, err := os.Stat(p); err == nil {
hasCFS = true
controllers[controller] = true
}
case controller == "pids":
hasPIDs = true
default:
controllers[controller] = true
}
}
}
Expand All @@ -146,7 +142,7 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
// a host PID scenario but we don't support this.
g, err := os.Open("/proc/1/cgroup")
if err != nil {
return "", "", false, false
return
}
defer g.Close()
scan = bufio.NewScanner(g)
Expand All @@ -170,5 +166,5 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
}
}
}
return kubeletRoot, runtimeRoot, hasCFS, hasPIDs
return
}
5 changes: 3 additions & 2 deletions pkg/cgroups/cgroups_windows.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build windows
// +build windows

package cgroups
Expand All @@ -6,6 +7,6 @@ func Validate() error {
return nil
}

func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
return "", "", false, false
func CheckCgroups() (kubeletRoot, runtimeRoot string, controllers map[string]bool) {
return
}
41 changes: 21 additions & 20 deletions pkg/daemons/agent/agent_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,15 @@ import (
"k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes"
)

func createRootlessConfig(argsMap map[string]string, hasCFS, hasPIDs bool) {
func createRootlessConfig(argsMap map[string]string, controllers map[string]bool) {
argsMap["feature-gates=KubeletInUserNamespace"] = "true"
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
if hasCFS && hasPIDs && cgroupfsWritable {
if controllers["cpu"] && controllers["pids"] && cgroupfsWritable {
logrus.Info("cgroup v2 controllers are delegated for rootless.")
// cgroupfs v2, delegated for rootless by systemd
argsMap["cgroup-driver"] = "cgroupfs"
} else {
logrus.Fatal("delegated cgroup v2 controllers are required for rootless.")
return
}
logrus.Fatal("delegated cgroup v2 controllers are required for rootless.")
}

func checkRuntimeEndpoint(cfg *config.Agent, argsMap map[string]string) {
Expand Down Expand Up @@ -67,14 +65,13 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
bindAddress = "::1"
}
argsMap := map[string]string{
"healthz-bind-address": bindAddress,
"read-only-port": "0",
"cluster-domain": cfg.ClusterDomain,
"kubeconfig": cfg.KubeConfigKubelet,
"eviction-hard": "imagefs.available<5%,nodefs.available<5%",
"eviction-minimum-reclaim": "imagefs.available=10%,nodefs.available=10%",
"fail-swap-on": "false",
//"cgroup-root": "/k3s",
"healthz-bind-address": bindAddress,
"read-only-port": "0",
"cluster-domain": cfg.ClusterDomain,
"kubeconfig": cfg.KubeConfigKubelet,
"eviction-hard": "imagefs.available<5%,nodefs.available<5%",
"eviction-minimum-reclaim": "imagefs.available=10%,nodefs.available=10%",
"fail-swap-on": "false",
"cgroup-driver": "cgroupfs",
"authentication-token-webhook": "true",
"anonymous-auth": "false",
Expand Down Expand Up @@ -138,13 +135,13 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
if err != nil || defaultIP.String() != cfg.NodeIP {
argsMap["node-ip"] = cfg.NodeIP
}
kubeletRoot, runtimeRoot, hasCFS, hasPIDs := cgroups.CheckCgroups()
if !hasCFS {
logrus.Warn("Disabling CPU quotas due to missing cpu.cfs_period_us")
kubeletRoot, runtimeRoot, controllers := cgroups.CheckCgroups()
if !controllers["cpu"] {
logrus.Warn("Disabling CPU quotas due to missing cpu controller or cpu.cfs_period_us")
argsMap["cpu-cfs-quota"] = "false"
}
if !hasPIDs {
logrus.Fatal("PIDS cgroup support not found")
if !controllers["pids"] {
logrus.Fatal("pids cgroup controller not found")
}
if kubeletRoot != "" {
argsMap["kubelet-cgroups"] = kubeletRoot
Expand Down Expand Up @@ -172,7 +169,11 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
}

if cfg.Rootless {
createRootlessConfig(argsMap, hasCFS, hasCFS)
createRootlessConfig(argsMap, controllers)
}

if cfg.Systemd {
argsMap["cgroup-driver"] = "systemd"
}

if cfg.ProtectKernelDefaults {
Expand Down
1 change: 1 addition & 0 deletions pkg/daemons/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ type Agent struct {
ExtraKubeProxyArgs []string
PauseImage string
Snapshotter string
Systemd bool
CNIPlugin bool
NodeTaints []string
NodeLabels []string
Expand Down