Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Query cgroup memory data and fuse with /proc data for more reliable information. #50

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
288 changes: 288 additions & 0 deletions sigar_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package sigar
import (
"bufio"
"bytes"
"errors"
"io"
"io/ioutil"
"os"
Expand All @@ -19,11 +20,47 @@ var system struct {
}

var Procd string
var Sysd1 string
var Sysd2 string

// Files in system directories used here
// - Procd
// - /stat
// - /meminfo
// - /self/cgroup | 'grep :memory:' | split ':' | last => cgroup
// - /self/cgroup | 'grep ::' | split ':' | last => cgroup/fallback
// - /self/mounts
// - Sysd1 (cgroup v1)
// - memory/<cgroup>/memory.limit_in_bytes
// - memory/<cgroup>/memory.stat
// - Sysd2 (cgroup v2)
// - <cgroup>/memory.high
// - <cgroup>/memory.current
// - <cgroup>/memory.swap.current
//
// While Procd is fixed `/proc` the `Sysd*` directories are
// dynamic. I.e. while there are semi-standard mount points for the
// cgroup controllers, this is just convention. They can be mounted
// anywhere. The file `/proc/self/mounts` contains the information we
// need.

func init() {
system.ticks = 100 // C.sysconf(C._SC_CLK_TCK)

Procd = "/proc"
Sysd1 = ""
Sysd2 = ""

determineControllerMounts(&Sysd1, &Sysd2)

// Fallbacks for cgroup controller mount points if nothing was
// found in /proc/self/mounts
if Sysd1 == "" {
Sysd1 = "/sys/fs/cgroup/memory"
}
if Sysd2 == "" {
Sysd2 = "/sys/fs/cgroup/unified"
}

// grab system boot time
readFile(Procd+"/stat", func(line string) bool {
Expand Down Expand Up @@ -86,6 +123,70 @@ func (self *Mem) Get() error {
self.Used = self.Total - self.Free
self.ActualUsed = self.Total - self.ActualFree

// Instead of detecting if this code is run within a container
// or not (*), we simply attempt to retrieve the cgroup
// information about memory limits and usage and if present
// incorporate them into the results.
//
// 0. If we are unable to determine the Cgroup for the process
// we ignore it and stay with the host data.
//
// 1. If the cgroup limit is not available we ignore it and
// stay with the host data.
//
// 2. Note that we are taking the smaller of host total and
// cgroup limit, as the safer value for the total. The
// reason here is that there are Linux systems which report
// something like 8 EiB (Exa!) (**) as the cgroup limit, on
// systems which have only 64 GiB (Giga) of physical RAM.
//
// (*) There does not seem to be a truly reliable and portable
// means of detecting execution inside a container vs
// outside. Between all the platforms (macos, linux,
// windows), and container runtimes (docker, lxc, oci, ...).
//
// (**) The exact value actually is 2^63 - 4096, i.e
// 8 EiB - 4 KiB. This is, as far as is known, the
// maximum limit of the Linux virtual memory system.

var cgroup string
if err := determineSelfCgroup(&cgroup); err != nil {
// Unable to determine process' Cgroup
return nil
}

cgroupLimit, err := determineMemoryLimit(cgroup)
// (x) If the limit is not available or bogus we keep the host data as limit.

if err == nil && cgroupLimit < self.Total {
// See (2) above why only a cgroup limit less than the
// host total is accepted as the new total available
// memory in the cgroup.
self.Total = cgroupLimit
}

rss, err := determineMemoryUsage(cgroup)

if err != nil {
return nil
}

swap, err := determineSwapUsage(cgroup)
if err != nil {
// Swap information is optional. I.e. the kernel may
// have swap accounting disabled. Because of this any
// kind of trouble determining the swap usage is
// mapped to `no swap used`. This allows us to limp
// on with some inaccuracies, instead of aborting.
swap = 0
}

self.Used = rss + swap
self.Free = self.Total - self.Used

self.ActualUsed = self.Used
self.ActualFree = self.Free

return nil
}

Expand Down Expand Up @@ -316,6 +417,119 @@ func (self *ProcExe) Get(pid int) error {
return nil
}

func determineSwapUsage(cgroup string) (uint64, error) {
// Check v2 over v1
usageAsString, err := ioutil.ReadFile(Sysd2 + cgroup + "/memory.swap.current")
if err == nil {
return strtoull(strings.Split(string(usageAsString), "\n")[0])
}

var swap uint64
table := map[string]*uint64{
"swap": &swap,
}

err, found := parseCgroupMeminfo(Sysd1+cgroup, table)
if err == nil {
if !found {
// If no data was found, simply claim `zero swap used`.
return 0, errors.New("no data found")
}
return swap, nil
}

return 0, err
}

func determineMemoryUsage(cgroup string) (uint64, error) {
// Check v2 over v1
usageAsString, err := ioutil.ReadFile(Sysd2 + cgroup + "/memory.current")
if err == nil {
return strtoull(strings.Split(string(usageAsString), "\n")[0])
}

var rss uint64
table := map[string]*uint64{
"total_rss": &rss,
}

err, found := parseCgroupMeminfo(Sysd1+cgroup, table)
if err == nil {
if !found {
return 0, errors.New("no data found")
}
return rss, nil
}

return 0, err
}

func determineMemoryLimit(cgroup string) (uint64, error) {
// Check v2 over v1
limitAsString, err := ioutil.ReadFile(Sysd2 + cgroup + "/memory.high")
if err == nil {
val := strings.Split(string(limitAsString), "\n")[0]
if val == "max" {
return 0, errors.New("no limit")
// See (x) in the caller where this keeps the host's self.Total.
}
return strtoull(val)
}

limitAsString, err = ioutil.ReadFile(Sysd1 + cgroup + "/memory.limit_in_bytes")
if err == nil {
return strtoull(strings.Split(string(limitAsString), "\n")[0])
}

return 0, err
}

func determineSelfCgroup(cgroup *string) error {
// - /proc/self/cgroup
// Expected line syntax - id:tag:path
// Three fields required in each line.

// Look for a cgroup v1 memory controller first
err := readFile(Procd+"/self/cgroup", func(line string) bool {
fields := strings.Split(line, ":")
// Match: `*:memory:/path`
if len(fields) < 3 {
return true
}
if fields[1] == "memory" {
*cgroup = strings.Trim(fields[len(fields)-1], " ")
}
return true
})
if err != nil {
return err
}
if *cgroup != "" {
return nil
}

// Fall back to a cgroup v2 memory controller
err = readFile(Procd+"/self/cgroup", func(line string) bool {
fields := strings.Split(line, ":")
// Match: `0::/path`
if len(fields) < 3 {
return true
}
if (fields[0] == "0") && (fields[1] == "") {
*cgroup = strings.Trim(fields[len(fields)-1], " ")
}
return true
})
if err != nil {
return err
}
if *cgroup != "" {
return nil
}

return errors.New("unable to determine control group")
}

func parseMeminfo(table map[string]*uint64) error {
return readFile(Procd+"/meminfo", func(line string) bool {
fields := strings.Split(line, ":")
Expand All @@ -332,6 +546,27 @@ func parseMeminfo(table map[string]*uint64) error {
})
}

func parseCgroupMeminfo(cgroupDir string, table map[string]*uint64) (error, bool) {
var found bool
err := readFile(cgroupDir+"/memory.stat", func(line string) bool {
fields := strings.Split(line, " ")
if ptr := table[fields[0]]; ptr != nil {
num := strings.TrimLeft(fields[1], " ")
val, err := strtoull(strings.Fields(num)[0])
if err == nil {
*ptr = val
found = true
}
}

return true
})
if err != nil {
return err, false
}
return nil, found
}

func parseCpuStat(self *Cpu, line string) error {
fields := strings.Fields(line)

Expand Down Expand Up @@ -390,3 +625,56 @@ func readProcFile(pid int, name string) ([]byte, error) {

return contents, err
}

func determineControllerMounts(sysd1, sysd2 *string) {
// grab cgroup controller mount points
readFile(Procd+"/self/mounts", func(line string) bool {

// Entries have the form `device path type options`.
// The elements are separated by single spaces.
//
// v2: `path` element of entry fulfilling `type == "cgroup2"`.
// v1: `path` element of entry fulfilling `type == "cgroup" && options ~ "memory"`
//
// NOTE: The `device` column can be anything. It
// cannot be used to pare down the set of entries
// going into the full check.

fields := strings.Split(line, " ")
if len(fields) < 4 {
return true
}

mpath := fields[1]
mtype := fields[2]
moptions := fields[3]

if mtype == "cgroup2" {
if *sysd2 != "" {
panic("Multiple cgroup v2 mount points")
}
*sysd2 = mpath
return true
}
if mtype == "cgroup" {
options := strings.Split(moptions, ",")
if stringSliceContains(options, "memory") {
if *sysd1 != "" {
panic("Multiple cgroup v1 mount points")
}
*sysd1 = mpath
return true
}
}
return true
})
}

func stringSliceContains(a []string, x string) bool {
for _, n := range a {
if x == n {
return true
}
}
return false
}
Loading