Skip to content

Commit

Permalink
libcontainer: add support for Intel RDT/CAT in runc
Browse files Browse the repository at this point in the history
This PR fixes issue opencontainers#433
opencontainers#433

About Intel RDT/CAT feature:
Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
Cache is the only resource that is supported in RDT.

This feature provides a way for the software to restrict cache allocation to a
defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
The different subsets are identified by class of service (CLOS) and each CLOS
has a capacity bitmask (CBM).

For more information about Intel RDT/CAT can be found in the section 17.17
of Intel Software Developer Manual and the kernel document:
https://lkml.org/lkml/2016/7/12/747

About Intel RDT/CAT kernel interface:
In Linux kernel, the interface is defined and exposed via "resource control"
filesystem, which is a "cgroup-like" interface.

Comparing with cgroups, it has similar process management lifecycle and
interfaces in a container. But unlike cgroups' hierarchy, it has single level
filesystem layout.

Intel RDT "resource control" filesystem hierarchy:
mount -t rscctrl rscctrl /sys/fs/rscctrl
tree /sys/fs/rscctrl
/sys/fs/rscctrl
|-- cpus
|-- info
|   |-- info
|   |-- l3
|       |-- domain_to_cache_id
|       |-- max_cbm_len
|       |-- max_closid
|-- schemas
|-- tasks
|-- <container_id>
    |-- cpus
    |-- schemas
    |-- tasks

The file `tasks` has all task ids belonging to the partition "container_id".
The task ids in the file will be added or removed among partitions. A task id
only stays in one directory at the same time.

The file `schemas` has allocation bitmasks/values for L3 cache on each socket,
which contains L3 cache id and capacity bitmask (CBM).
	Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.

The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
be set is less than the max bit. The max bits in the CBM is varied among
supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
layout, the CBM in a "partition" should be a subset of the CBM in root. Kernel
will check if it is valid when writing. e.g., 0xfffff in root indicates the
max bits of CBM is 20 bits, which mapping to entire L3 cache capacity. Some
valid CBM values to set in a "partition": 0xf, 0xf0, 0x3ff, 0x1f00 and etc.

The file `cpus` has a cpu bitmask that specifies the CPUs that are bound to the
schemas. Any tasks scheduled on the cpus will use the schemas.

For more information about Intel RDT/CAT kernel interface:
https://lkml.org/lkml/2016/7/12/764

An example for runc:
There are two L3 caches in the two-socket machine, the default CBM is 0xfffff
and the max CBM length is 20 bits. This configuration assigns 4/5 of L3 cache
id 0 and the whole L3 cache id 1 for the container:
"linux": {
	"resources": {
		"intelRdt": {
			"l3CacheSchema": "L3:0=ffff0;1=fffff",
			"L3CacheCpus":
			"00000000,00000000,00000000,00000000,00000000,00000000"
		}
	}
}

Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
  • Loading branch information
xiaochenshen committed Aug 11, 2016
1 parent 56ec051 commit 920de50
Show file tree
Hide file tree
Showing 12 changed files with 691 additions and 29 deletions.
58 changes: 53 additions & 5 deletions events.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/urfave/cli"
)

Expand All @@ -24,11 +25,12 @@ type event struct {

// stats is the runc specific stats structure for stability when encoding and decoding stats.
type stats struct {
Cpu cpu `json:"cpu"`
Memory memory `json:"memory"`
Pids pids `json:"pids"`
Blkio blkio `json:"blkio"`
Hugetlb map[string]hugetlb `json:"hugetlb"`
Cpu cpu `json:"cpu"`
Memory memory `json:"memory"`
Pids pids `json:"pids"`
Blkio blkio `json:"blkio"`
Hugetlb map[string]hugetlb `json:"hugetlb"`
IntelRdt intelRdt `jsaon:"intelRdt"`
}

type hugetlb struct {
Expand Down Expand Up @@ -95,6 +97,25 @@ type memory struct {
Raw map[string]uint64 `json:"raw,omitempty"`
}

type intelRdtRoot struct {
Info string `json:"info,omitempty"`
DomainToCacheId string `json:"domainToCacheId,omitempty"`
MaxCbmLen uint64 `json:"maxCbmLen,omitempty"`
MaxClosid uint64 `json:"maxClosid,omitempty"`
RootL3CacheSchema string `json:"rootL3CacheSchema,omitempty"`
RootL3CacheCpus string `json:"rootL3CacheCpus,omitempty"`
}

type intelRdtSub struct {
L3CacheSchema string `json:"l3CacheSchema,omitempty"`
L3CacheCpus string `json:"l3CacheCpus,omitempty"`
}

type intelRdt struct {
IntelRdtRoot intelRdtRoot `json:"intelRdtRoot,omitempty"`
IntelRdtSub intelRdtSub `json:"intelRdtSub,omitempty"`
}

var eventsCommand = cli.Command{
Name: "events",
Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics",
Expand Down Expand Up @@ -224,6 +245,15 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
for k, v := range cg.HugetlbStats {
s.Hugetlb[k] = convertHugtlb(v)
}

is := ls.IntelRdtStats
if is == nil {
return nil
}

s.IntelRdt.IntelRdtRoot = convertIntelRdtRoot(is.IntelRdtRootStats)
s.IntelRdt.IntelRdtSub = convertIntelRdtSub(is.IntelRdtSubStats)

return &s
}

Expand Down Expand Up @@ -256,3 +286,21 @@ func convertBlkioEntry(c []cgroups.BlkioStatEntry) []blkioEntry {
}
return out
}

func convertIntelRdtRoot(i intelrdt.IntelRdtRootStats) intelRdtRoot {
return intelRdtRoot{
Info: i.Info,
DomainToCacheId: i.DomainToCacheId,
MaxCbmLen: i.MaxCbmLen,
MaxClosid: i.MaxClosid,
RootL3CacheSchema: i.RootL3CacheSchema,
RootL3CacheCpus: i.RootL3CacheCpus,
}
}

func convertIntelRdtSub(i intelrdt.IntelRdtSubStats) intelRdtSub {
return intelRdtSub{
L3CacheSchema: i.L3CacheSchema,
L3CacheCpus: i.L3CacheCpus,
}
}
7 changes: 7 additions & 0 deletions libcontainer/configs/cgroup_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,11 @@ type Resources struct {

// Set class identifier for container's network packets
NetClsClassid uint32 `json:"net_cls_classid"`

// Intel RDT: the schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
IntelRdtL3CacheSchema string `json:"intel_rdt_l3_cache_schema"`

// Intel RDT: the bitmask of the CPUs that are bound to the schema
IntelRdtL3CacheCpus string `json:"intel_rdt_l3_cache_cpus"`
}
51 changes: 40 additions & 11 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/criurpc"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/syndtr/gocapability/capability"
Expand All @@ -35,6 +36,7 @@ type linuxContainer struct {
root string
config *configs.Config
cgroupManager cgroups.Manager
intelRdtManager intelrdt.Manager
initPath string
initArgs []string
initProcess parentProcess
Expand Down Expand Up @@ -62,6 +64,9 @@ type State struct {

// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
ExternalDescriptors []string `json:"external_descriptors,omitempty"`

// Intel RDT "resource control" filesystem path
IntelRdtPath string `json:"intel_rdt_path"`
}

// Container is a libcontainer container object.
Expand Down Expand Up @@ -156,6 +161,11 @@ func (c *linuxContainer) Stats() (*Stats, error) {
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
}
if c.intelRdtManager != nil {
if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil {
return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats")
}
}
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
Expand All @@ -180,7 +190,15 @@ func (c *linuxContainer) Set(config configs.Config) error {
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
}
c.config = &config
return c.cgroupManager.Set(c.config)
if err := c.cgroupManager.Set(c.config); err != nil {
return err
}
if c.intelRdtManager != nil {
if err := c.intelRdtManager.Set(c.config); err != nil {
return err
}
}
return nil
}

func (c *linuxContainer) Start(process *Process) error {
Expand Down Expand Up @@ -346,16 +364,17 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
return nil, err
}
return &initProcess{
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
manager: c.cgroupManager,
config: c.newInitConfig(p),
container: c,
process: p,
bootstrapData: data,
sharePidns: sharePidns,
rootDir: rootDir,
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
manager: c.cgroupManager,
intelRdtManager: c.intelRdtManager,
config: c.newInitConfig(p),
container: c,
process: p,
bootstrapData: data,
sharePidns: sharePidns,
rootDir: rootDir,
}, nil
}

Expand All @@ -371,10 +390,15 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
if err != nil {
return nil, err
}
intelRdtPath := ""
if c.intelRdtManager != nil {
intelRdtPath = c.intelRdtManager.GetPath()
}
// TODO: set on container for process management
return &setnsProcess{
cmd: cmd,
cgroupPaths: c.cgroupManager.GetPaths(),
intelRdtPath: intelRdtPath,
childPipe: childPipe,
parentPipe: parentPipe,
config: c.newInitConfig(p),
Expand Down Expand Up @@ -1190,6 +1214,10 @@ func (c *linuxContainer) currentState() (*State, error) {
startTime, _ = c.initProcess.startTime()
externalDescriptors = c.initProcess.externalDescriptors()
}
IntelRdtPath := ""
if c.intelRdtManager != nil {
IntelRdtPath = c.intelRdtManager.GetPath()
}
state := &State{
BaseState: BaseState{
ID: c.ID(),
Expand All @@ -1201,6 +1229,7 @@ func (c *linuxContainer) currentState() (*State, error) {
CgroupPaths: c.cgroupManager.GetPaths(),
NamespacePaths: make(map[configs.NamespaceType]string),
ExternalDescriptors: externalDescriptors,
IntelRdtPath: IntelRdtPath,
}
if pid > 0 {
for _, ns := range c.config.Namespaces {
Expand Down
28 changes: 28 additions & 0 deletions libcontainer/factory_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/utils"
)

Expand Down Expand Up @@ -89,6 +90,21 @@ func Cgroupfs(l *LinuxFactory) error {
return nil
}

// IntelRdtfs is an options func to configure a LinuxFactory to return
// containers that use the Intel RDT "resource control" filesystem to
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
func IntelRdtFs(l *LinuxFactory) error {
if intelrdt.IntelRdtIsEnabled() {
l.NewIntelRdtManager = func(config *configs.Cgroup, path string) intelrdt.Manager {
return &intelrdt.IntelRdtManager{
Cgroups: config,
Path: path,
}
}
}
return nil
}

// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
func TmpfsRoot(l *LinuxFactory) error {
mounted, err := mount.Mounted(l.Root)
Expand Down Expand Up @@ -156,6 +172,9 @@ type LinuxFactory struct {

// NewCgroupsManager returns an initialized cgroups manager for a single container.
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager

// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
NewIntelRdtManager func(config *configs.Cgroup, path string) intelrdt.Manager
}

func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
Expand Down Expand Up @@ -208,6 +227,11 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}
c.state = &stoppedState{c: c}
c.intelRdtManager = nil
if l.NewIntelRdtManager != nil {
c.intelRdtManager = l.NewIntelRdtManager(config.Cgroups, id)
}

return c, nil
}

Expand Down Expand Up @@ -241,6 +265,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
if err := c.refreshState(); err != nil {
return nil, err
}
c.intelRdtManager = nil
if l.NewIntelRdtManager != nil {
c.intelRdtManager = l.NewIntelRdtManager(state.Config.Cgroups, state.IntelRdtPath)
}
return c, nil
}

Expand Down
Loading

0 comments on commit 920de50

Please sign in to comment.