Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support fetching PSI stats for cgroupv2 containers - takeover #3679

Closed
wants to merge 13 commits into from
13 changes: 13 additions & 0 deletions events.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,13 @@ information is displayed once every 5 seconds.`,
},
}

func convertPSI(from *cgroups.PSIData, to *cgroups.PSIData) {
to.Avg10 = from.Avg10
to.Avg60 = from.Avg60
to.Avg300 = from.Avg300
to.Total = from.Total
}

func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
cg := ls.CgroupStats
if cg == nil {
Expand All @@ -129,6 +136,8 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
s.CPU.Throttling.Periods = cg.CpuStats.ThrottlingData.Periods
s.CPU.Throttling.ThrottledPeriods = cg.CpuStats.ThrottlingData.ThrottledPeriods
s.CPU.Throttling.ThrottledTime = cg.CpuStats.ThrottlingData.ThrottledTime
convertPSI(&cg.CpuStats.PSI.Some, &s.CPU.PSI.Some)
convertPSI(&cg.CpuStats.PSI.Full, &s.CPU.PSI.Full)

s.CPUSet = types.CPUSet(cg.CPUSetStats)

Expand All @@ -138,6 +147,8 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
s.Memory.Swap = convertMemoryEntry(cg.MemoryStats.SwapUsage)
s.Memory.Usage = convertMemoryEntry(cg.MemoryStats.Usage)
s.Memory.Raw = cg.MemoryStats.Stats
convertPSI(&cg.MemoryStats.PSI.Some, &s.Memory.PSI.Some)
convertPSI(&cg.MemoryStats.PSI.Full, &s.Memory.PSI.Full)

s.Blkio.IoServiceBytesRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceBytesRecursive)
s.Blkio.IoServicedRecursive = convertBlkioEntry(cg.BlkioStats.IoServicedRecursive)
Expand All @@ -147,6 +158,8 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
s.Blkio.IoMergedRecursive = convertBlkioEntry(cg.BlkioStats.IoMergedRecursive)
s.Blkio.IoTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoTimeRecursive)
s.Blkio.SectorsRecursive = convertBlkioEntry(cg.BlkioStats.SectorsRecursive)
convertPSI(&cg.BlkioStats.PSI.Some, &s.Blkio.PSI.Some)
convertPSI(&cg.BlkioStats.PSI.Full, &s.Blkio.PSI.Full)

s.Hugetlb = make(map[string]types.Hugetlb)
for k, v := range cg.HugetlbStats {
Expand Down
14 changes: 14 additions & 0 deletions libcontainer/cgroups/fs2/fs2.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)

type parseError = fscommon.ParseError
Expand Down Expand Up @@ -114,6 +115,19 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
// psi (since kernel 4.20)
// open *.pressure file returns
// - ErrNotExist when kernel < 4.20 or CONFIG_PSI is disabled
// - ENOTSUP when we requires psi=1 in kernel command line to enable PSI support
if err := statPSI(m.dirPath, "cpu.pressure", st); err != nil && !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.ENOTSUP) {
errs = append(errs, err)
}
if err := statPSI(m.dirPath, "memory.pressure", st); err != nil && !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.ENOTSUP) {
errs = append(errs, err)
}
if err := statPSI(m.dirPath, "io.pressure", st); err != nil && !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.ENOTSUP) {
errs = append(errs, err)
}
// hugetlb (since kernel 5.6)
if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
Expand Down
99 changes: 99 additions & 0 deletions libcontainer/cgroups/fs2/psi.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package fs2

import (
"bufio"
"fmt"
"os"
"strconv"
"strings"

"github.com/opencontainers/runc/libcontainer/cgroups"
)

func statPSI(dirPath string, file string, stats *cgroups.Stats) error {
if stats == nil {
return fmt.Errorf("invalid Stats pointer is nil")
}
f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
if err != nil {
return err
}
defer f.Close()

var psistats *cgroups.PSIStats
switch file {
case "cpu.pressure":
psistats = &stats.CpuStats.PSI
case "memory.pressure":
psistats = &stats.MemoryStats.PSI
case "io.pressure":
psistats = &stats.BlkioStats.PSI
}

sc := bufio.NewScanner(f)
for sc.Scan() {
parts := strings.Fields(sc.Text())
switch parts[0] {
case "some":
data, err := parsePSIData(parts[1:])
if err != nil {
return err
}
psistats.Some = *data
case "full":
data, err := parsePSIData(parts[1:])
if err != nil {
return err
}
psistats.Full = *data
}
}
if err := sc.Err(); err != nil {
return &parseError{Path: dirPath, File: file, Err: err}
}
return nil
}

func setFloat(s string, f *float64) error {
if f == nil {
return fmt.Errorf("invalid pointer *float64 is nil")
}
v, err := strconv.ParseFloat(s, 64)
if err != nil {
return fmt.Errorf("invalid PSI value: %q", s)
}
*f = v

return nil
}

func parsePSIData(psi []string) (*cgroups.PSIData, error) {
data := cgroups.PSIData{}
for _, f := range psi {
kv := strings.SplitN(f, "=", 2)
if len(kv) != 2 {
return nil, fmt.Errorf("invalid psi data: %q", f)
}
switch kv[0] {
case "avg10":
if err := setFloat(kv[1], &data.Avg10); err != nil {
return nil, err
}
case "avg60":
if err := setFloat(kv[1], &data.Avg60); err != nil {
return nil, err
}
case "avg300":
if err := setFloat(kv[1], &data.Avg300); err != nil {
return nil, err
}
case "total":
v, err := strconv.ParseUint(kv[1], 10, 64)
if err != nil {
return nil, fmt.Errorf("invalid PSI value: %q", f)
}
data.Total = v
}
}
return &data, nil
}
47 changes: 47 additions & 0 deletions libcontainer/cgroups/fs2/psi_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package fs2

import (
"os"
"path/filepath"
"reflect"
"testing"

"github.com/opencontainers/runc/libcontainer/cgroups"
)

const examplePSIData = `some avg10=1.71 avg60=2.36 avg300=2.57 total=230548833
full avg10=1.00 avg60=1.01 avg300=1.00 total=157622356`

func TestStatCPUPSI(t *testing.T) {
// We're using a fake cgroupfs.
cgroups.TestMode = true

fakeCgroupDir := t.TempDir()
statPath := filepath.Join(fakeCgroupDir, "cpu.pressure")

if err := os.WriteFile(statPath, []byte(examplePSIData), 0o644); err != nil {
t.Fatal(err)
}

var stats cgroups.Stats
if err := statPSI(fakeCgroupDir, "cpu.pressure", &stats); err != nil {
t.Error(err)
}

if !reflect.DeepEqual(stats.CpuStats.PSI, cgroups.PSIStats{
Some: cgroups.PSIData{
Avg10: 1.71,
Avg60: 2.36,
Avg300: 2.57,
Total: 230548833,
},
Full: cgroups.PSIData{
Avg10: 1.00,
Avg60: 1.01,
Avg300: 1.00,
Total: 157622356,
},
}) {
t.Errorf("unexpected PSI result: %+v", stats.CpuStats.PSI)
}
}
15 changes: 15 additions & 0 deletions libcontainer/cgroups/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,22 @@ type CpuUsage struct {
UsageInUsermode uint64 `json:"usage_in_usermode"`
}

type PSIData struct {
Avg10 float64 `json:"avg10"`
Avg60 float64 `json:"avg60"`
Avg300 float64 `json:"avg300"`
Total uint64 `json:"total"`
}

type PSIStats struct {
Some PSIData `json:"some,omitempty"`
Full PSIData `json:"full,omitempty"`
}

type CpuStats struct {
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
PSI PSIStats `json:"psi,omitempty"`
}

type CPUSetStats struct {
Expand Down Expand Up @@ -89,6 +102,7 @@ type MemoryStats struct {
UseHierarchy bool `json:"use_hierarchy"`

Stats map[string]uint64 `json:"stats,omitempty"`
PSI PSIStats `json:"psi,omitempty"`
}

type PageUsageByNUMA struct {
Expand Down Expand Up @@ -133,6 +147,7 @@ type BlkioStats struct {
IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"`
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
PSI PSIStats `json:"psi,omitempty"`
}

type HugetlbStats struct {
Expand Down
26 changes: 26 additions & 0 deletions tests/integration/events.bats
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,32 @@ function teardown() {
[[ "${lines[0]}" == *"data"* ]]
}

# shellcheck disable=SC2030
@test "events --stats with psi data" {
requires root cgroups_v2 psi
init_cgroup_paths

update_config '.linux.resources.cpu |= { "quota": 1000 }'

runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]

# stress the cpu a little bit
runc exec test_busybox dd if=/dev/zero bs=1M count=5 of=/dev/null
[ "$status" -eq 0 ]

runc events --stats test_busybox
[ "$status" -eq 0 ]
# fetch stats to see PSI metrics
for psi_type in some full; do
for psi_metric in avg10 avg60 avg300 total; do
[[ "$(echo "${lines[0]}" | jq .data.cpu.psi.$psi_type.$psi_metric)" != "" ]]
done
done
# total must have been more than 0
[[ "$(echo "${lines[0]}" | jq .data.cpu.psi.some.total)" != "0" ]]
}

function test_events() {
# XXX: currently cgroups require root containers.
requires root
Expand Down
7 changes: 7 additions & 0 deletions tests/integration/helpers.bash
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,13 @@ function requires() {
skip_me=1
fi
;;
psi)
# If PSI is not compiled in the kernel, the file will not exist.
# If PSI is compiled, but not enabled, read will fail with ENOTSUPP.
if ! cat /sys/fs/cgroup/cpu.pressure &>/dev/null; then
skip_me=1
fi
;;
*)
fail "BUG: Invalid requires $var."
;;
Expand Down
12 changes: 11 additions & 1 deletion types/events.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package types

import "github.com/opencontainers/runc/libcontainer/intelrdt"
import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/intelrdt"
)

// Event struct for encoding the event data to json.
type Event struct {
Expand All @@ -21,6 +24,10 @@ type Stats struct {
NetworkInterfaces []*NetworkInterface `json:"network_interfaces"`
}

type PSIData cgroups.PSIData

type PSIStats cgroups.PSIStats

type Hugetlb struct {
Usage uint64 `json:"usage,omitempty"`
Max uint64 `json:"max,omitempty"`
Expand All @@ -43,6 +50,7 @@ type Blkio struct {
IoMergedRecursive []BlkioEntry `json:"ioMergedRecursive,omitempty"`
IoTimeRecursive []BlkioEntry `json:"ioTimeRecursive,omitempty"`
SectorsRecursive []BlkioEntry `json:"sectorsRecursive,omitempty"`
PSI PSIStats `json:"psi,omitempty"`
}

type Pids struct {
Expand All @@ -69,6 +77,7 @@ type CpuUsage struct {
type Cpu struct {
Usage CpuUsage `json:"usage,omitempty"`
Throttling Throttling `json:"throttling,omitempty"`
PSI PSIStats `json:"psi,omitempty"`
}

type CPUSet struct {
Expand Down Expand Up @@ -99,6 +108,7 @@ type Memory struct {
Kernel MemoryEntry `json:"kernel,omitempty"`
KernelTCP MemoryEntry `json:"kernelTCP,omitempty"`
Raw map[string]uint64 `json:"raw,omitempty"`
PSI PSIStats `json:"psi,omitempty"`
}

type L3CacheInfo struct {
Expand Down