Skip to content

Commit

Permalink
cgroup v2: support rootless systemd
Browse files Browse the repository at this point in the history
Tested with both Podman (master) and Moby (master), on Ubuntu 19.10 .

$ podman --cgroup-manager=systemd run -it --rm --runtime=runc \
  --cgroupns=host --memory 42m --cpus 0.42 --pids-limit 42 alpine
/ # cat /proc/self/cgroup
0::/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope
/ # cat /sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope/memory.max
44040192
/ # cat /sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope/cpu.max
42000 100000
/ # cat /sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope/pids.max
42

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
  • Loading branch information
AkihiroSuda committed Apr 2, 2020
1 parent e3e26ca commit 159eecf
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 20 deletions.
2 changes: 1 addition & 1 deletion libcontainer/cgroups/systemd/apply_nosystemd.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func IsRunningSystemd() bool {
return false
}

func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
func NewSystemdCgroupsManager(_ bool) (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
return nil, fmt.Errorf("Systemd not supported")
}

Expand Down
13 changes: 10 additions & 3 deletions libcontainer/cgroups/systemd/apply_systemd.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,22 +103,29 @@ func IsRunningSystemd() bool {
func getDbusConnection() (*systemdDbus.Conn, error) {
connOnce.Do(func() {
connDbus, connErr = systemdDbus.New()
if connErr != nil {
connDbus, connErr = NewUserSystemdDbus()
}
})
return connDbus, connErr
}

func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
func NewSystemdCgroupsManager(rootless bool) (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
if !IsRunningSystemd() {
return nil, fmt.Errorf("systemd not running on this host, can't use systemd as a cgroups.Manager")
}
if cgroups.IsCgroup2UnifiedMode() {
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &UnifiedManager{
Cgroups: config,
Paths: paths,
Cgroups: config,
Paths: paths,
Rootless: rootless,
}
}, nil
}
if rootless {
return nil, fmt.Errorf("cgroup v1 doesn't support rootless")
}
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &LegacyManager{
Cgroups: config,
Expand Down
45 changes: 34 additions & 11 deletions libcontainer/cgroups/systemd/unified_hierarchy.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"math"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
Expand All @@ -21,9 +22,10 @@ import (
)

type UnifiedManager struct {
mu sync.Mutex
Cgroups *configs.Cgroup
Paths map[string]string
mu sync.Mutex
Cgroups *configs.Cgroup
Paths map[string]string
Rootless bool
}

func (m *UnifiedManager) Apply(pid int) error {
Expand All @@ -34,6 +36,10 @@ func (m *UnifiedManager) Apply(pid int) error {
properties []systemdDbus.Property
)

if m.Rootless {
slice = "user.slice"
}

if c.Paths != nil {
paths := make(map[string]string)
for name, path := range c.Paths {
Expand Down Expand Up @@ -132,15 +138,15 @@ func (m *UnifiedManager) Apply(pid int) error {
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName)
}
} else if !isUnitExists(err) {
return err
return errors.Wrapf(err, "error while starting a transient unit slice=%q, unitName=%q, properties=%+v", slice, unitName, properties)
}

path, err := getv2Path(m.Cgroups)
path, err := getv2Path(m.Cgroups, m.Rootless, dbusConnection)
if err != nil {
return err
}
if err := createCgroupsv2Path(path); err != nil {
return err
if err := createCgroupsv2Path(path, m.Rootless); err != nil {
return errors.Wrapf(err, "error while creating cgroup v2 path %q", path)
}
m.Paths = map[string]string{
"pids": path,
Expand Down Expand Up @@ -198,8 +204,11 @@ func (m *UnifiedManager) GetUnifiedPath() (string, error) {
return unifiedPath, nil
}

func getv2Path(c *configs.Cgroup) (string, error) {
func getv2Path(c *configs.Cgroup, rootless bool, conn *systemdDbus.Conn) (string, error) {
slice := "system.slice"
if rootless {
slice = "user.slice"
}
if c.Parent != "" {
slice = c.Parent
}
Expand All @@ -209,10 +218,24 @@ func getv2Path(c *configs.Cgroup) (string, error) {
return "", err
}

if rootless {
// managerCGQuoted is typically "/user.slice/user-${uid}.slice/user@${uid}.service" including the quote symbols
managerCGQuoted, err := conn.GetManagerProperty("ControlGroup")
if err != nil {
return "", err
}
managerCG, err := strconv.Unquote(managerCGQuoted)
if err != nil {
return "", err
}
slice = filepath.Join(managerCG, slice)
}

// an example of the final path: "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope"
return filepath.Join(fs2.UnifiedMountpoint, slice, getUnitName(c)), nil
}

func createCgroupsv2Path(path string) (Err error) {
func createCgroupsv2Path(path string, rootless bool) (Err error) {
content, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
if err != nil {
return err
Expand Down Expand Up @@ -240,7 +263,7 @@ func createCgroupsv2Path(path string) (Err error) {
}
}
if i < len(elements[3:])-1 {
if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), res, 0755); err != nil {
if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), res, 0755); err != nil && !rootless {
return err
}
}
Expand All @@ -253,7 +276,7 @@ func (m *UnifiedManager) fsManager() (cgroups.Manager, error) {
if err != nil {
return nil, err
}
return fs2.NewManager(m.Cgroups, path, false)
return fs2.NewManager(m.Cgroups, path, m.Rootless)
}

func (m *UnifiedManager) Freeze(state configs.FreezerState) error {
Expand Down
103 changes: 103 additions & 0 deletions libcontainer/cgroups/systemd/user.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// +build linux

package systemd

import (
"bufio"
"bytes"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"

systemdDbus "github.com/coreos/go-systemd/v22/dbus"
dbus "github.com/godbus/dbus/v5"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/pkg/errors"
)

// NewUserSystemdDbus creates a connection for systemd user-instance.
func NewUserSystemdDbus() (*systemdDbus.Conn, error) {
addr, err := DetectUserDbusSessionBusAddress()
if err != nil {
return nil, err
}
uid, err := DetectUID()
if err != nil {
return nil, err
}

return systemdDbus.NewConnection(func() (*dbus.Conn, error) {
conn, err := dbus.Dial(addr)
if err != nil {
return nil, err
}
methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))}
err = conn.Auth(methods)
if err != nil {
conn.Close()
return nil, err
}
if err = conn.Hello(); err != nil {
conn.Close()
return nil, err
}
return conn, nil
})
}

// DetectUID detects UID from the OwnerUID field of `busctl --user status`
// if running in userNS. The value corresponds to sd_bus_creds_get_owner_uid(3) .
//
// Otherwise returns os.Getuid() .
func DetectUID() (int, error) {
if !system.RunningInUserNS() {
return os.Getuid(), nil
}
b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
if err != nil {
return -1, errors.Wrap(err, "could not execute `busctl --user --no-pager status`")
}
scanner := bufio.NewScanner(bytes.NewReader(b))
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(s, "OwnerUID=") {
uidStr := strings.TrimPrefix(s, "OwnerUID=")
i, err := strconv.Atoi(uidStr)
if err != nil {
return -1, errors.Wrapf(err, "could not detect the OwnerUID: %s", s)
}
return i, nil
}
}
return 0, nil
}

// DetectUserDbusSessionBusAddress returns $DBUS_SESSION_BUS_ADDRESS if set.
// Otherwise returns "unix:path=$XDG_RUNTIME_DIR/bus" if $XDG_RUNTIME_DIR/bus exists.
// Otheriwe parses the value from `systemctl --user show-environment` .
func DetectUserDbusSessionBusAddress() (string, error) {
if env := os.Getenv("DBUS_SESSION_BUS_ADDRESS"); env != "" {
return env, nil
}
if xdr := os.Getenv("XDG_RUNTIME_DIR"); xdr != "" {
busPath := filepath.Join(xdr, "bus")
if _, err := os.Stat(busPath); err == nil {
busAddress := "unix:path=" + busPath
return busAddress, nil
}
}
b, err := exec.Command("systemctl", "--user", "--no-pager", "show-environment").CombinedOutput()
if err != nil {
return "", errors.Wrap(err, "could not execute `systemctl --user --no-pager show-environment`")
}
scanner := bufio.NewScanner(bytes.NewReader(b))
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(s, "DBUS_SESSION_BUS_ADDRESS=") {
return strings.TrimPrefix(s, "DBUS_SESSION_BUS_ADDRESS="), nil
}
}
return "", errors.New("could not detect DBUS_SESSION_BUS_ADDRESS from `systemctl --user --no-pager show-environment`")
}
13 changes: 12 additions & 1 deletion libcontainer/factory_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,18 @@ func InitArgs(args ...string) func(*LinuxFactory) error {
// SystemdCgroups is an options func to configure a LinuxFactory to return
// containers that use systemd to create and manage cgroups.
func SystemdCgroups(l *LinuxFactory) error {
systemdCgroupsManager, err := systemd.NewSystemdCgroupsManager()
systemdCgroupsManager, err := systemd.NewSystemdCgroupsManager(false)
if err != nil {
return err
}
l.NewCgroupsManager = systemdCgroupsManager
return nil
}

// RootlessSystemdCgroups is an options func to configure a LinuxFactory to return
// containers that use systemd to create and manage cgroups.
func RootlessSystemdCgroups(l *LinuxFactory) error {
systemdCgroupsManager, err := systemd.NewSystemdCgroupsManager(true)
if err != nil {
return err
}
Expand Down
4 changes: 0 additions & 4 deletions rootless_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ func shouldUseRootlessCgroupManager(context *cli.Context) (bool, error) {
if b != nil {
return *b, nil
}

if context.GlobalBool("systemd-cgroup") {
return false, nil
}
}
if os.Geteuid() != 0 {
return true, nil
Expand Down
3 changes: 3 additions & 0 deletions utils_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
if context.GlobalBool("systemd-cgroup") {
if systemd.IsRunningSystemd() {
cgroupManager = libcontainer.SystemdCgroups
if rootlessCg {
cgroupManager = libcontainer.RootlessSystemdCgroups
}
} else {
return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available")
}
Expand Down

0 comments on commit 159eecf

Please sign in to comment.