Adding /proc/<pid>/schedstat (google#1872)

Add /proc/<pid>/schedstat metrics for scheduler metrics
mannychang · Mar 8, 2018 · 08f0c23 · 08f0c23
1 parent b817801
commit 08f0c23
Show file tree

Hide file tree

Showing 7 changed files with 130 additions and 20 deletions.
diff --git a/cadvisor.go b/cadvisor.go
@@ -64,6 +64,7 @@ var (
 	ignoreMetrics metricSetValue = metricSetValue{container.MetricSet{
 		container.NetworkTcpUsageMetrics: struct{}{},
 		container.NetworkUdpUsageMetrics: struct{}{},
+		container.ProcessSchedulerMetrics: struct{}{},
 	}}
 
 	// List of metrics that can be ignored.
@@ -73,6 +74,7 @@ var (
 		container.NetworkTcpUsageMetrics: struct{}{},
 		container.NetworkUdpUsageMetrics: struct{}{},
 		container.PerCpuUsageMetrics:     struct{}{},
+		container.ProcessSchedulerMetrics: struct{}{},
 	}
 )
 

diff --git a/container/factory.go b/container/factory.go
@@ -41,16 +41,17 @@ type ContainerHandlerFactory interface {
 type MetricKind string
 
 const (
-	CpuUsageMetrics        MetricKind = "cpu"
-	PerCpuUsageMetrics     MetricKind = "percpu"
-	MemoryUsageMetrics     MetricKind = "memory"
-	CpuLoadMetrics         MetricKind = "cpuLoad"
-	DiskIOMetrics          MetricKind = "diskIO"
-	DiskUsageMetrics       MetricKind = "disk"
-	NetworkUsageMetrics    MetricKind = "network"
-	NetworkTcpUsageMetrics MetricKind = "tcp"
-	NetworkUdpUsageMetrics MetricKind = "udp"
-	AppMetrics             MetricKind = "app"
+	CpuUsageMetrics         MetricKind = "cpu"
+	ProcessSchedulerMetrics MetricKind = "sched"
+	PerCpuUsageMetrics      MetricKind = "percpu"
+	MemoryUsageMetrics      MetricKind = "memory"
+	CpuLoadMetrics          MetricKind = "cpuLoad"
+	DiskIOMetrics           MetricKind = "diskIO"
+	DiskUsageMetrics        MetricKind = "disk"
+	NetworkUsageMetrics     MetricKind = "network"
+	NetworkTcpUsageMetrics  MetricKind = "tcp"
+	NetworkUdpUsageMetrics  MetricKind = "udp"
+	AppMetrics              MetricKind = "app"
 )
 
 func (mk MetricKind) String() string {

diff --git a/container/libcontainer/handler.go b/container/libcontainer/handler.go
@@ -28,6 +28,7 @@ import (
 	"github.com/google/cadvisor/container"
 	info "github.com/google/cadvisor/info/v1"
 
+	"bytes"
 	"github.com/golang/glog"
 	"github.com/opencontainers/runc/libcontainer"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -39,18 +40,20 @@ import (
 import "C"
 
 type Handler struct {
-	cgroupManager cgroups.Manager
-	rootFs        string
-	pid           int
-	ignoreMetrics container.MetricSet
+	cgroupManager   cgroups.Manager
+	rootFs          string
+	pid             int
+	ignoreMetrics   container.MetricSet
+	pidMetricsCache map[int]*info.CpuSchedstat
 }
 
 func NewHandler(cgroupManager cgroups.Manager, rootFs string, pid int, ignoreMetrics container.MetricSet) *Handler {
 	return &Handler{
-		cgroupManager: cgroupManager,
-		rootFs:        rootFs,
-		pid:           pid,
-		ignoreMetrics: ignoreMetrics,
+		cgroupManager:   cgroupManager,
+		rootFs:          rootFs,
+		pid:             pid,
+		ignoreMetrics:   ignoreMetrics,
+		pidMetricsCache: make(map[int]*info.CpuSchedstat),
 	}
 }
 
@@ -66,6 +69,18 @@ func (h *Handler) GetStats() (*info.ContainerStats, error) {
 	withPerCPU := !h.ignoreMetrics.Has(container.PerCpuUsageMetrics)
 	stats := newContainerStats(libcontainerStats, withPerCPU)
 
+	if !h.ignoreMetrics.Has(container.ProcessSchedulerMetrics) {
+		pids, err := h.cgroupManager.GetAllPids()
+		if err != nil {
+			glog.V(4).Infof("Could not get PIDs for container %d: %v", h.pid, err)
+		} else {
+			stats.Cpu.Schedstat, err = schedulerStatsFromProcs(h.rootFs, pids, h.pidMetricsCache)
+			if err != nil {
+				glog.V(4).Infof("Unable to get Process Scheduler Stats: %v", err)
+			}
+		}
+	}
+
 	// If we know the pid then get network stats from /proc/<pid>/net/dev
 	if h.pid == 0 {
 		return stats, nil
@@ -117,6 +132,50 @@ func (h *Handler) GetStats() (*info.ContainerStats, error) {
 	return stats, nil
 }
 
+func schedulerStatsFromProcs(rootFs string, pids []int, pidMetricsCache map[int]*info.CpuSchedstat) (info.CpuSchedstat, error) {
+	for _, pid := range pids {
+		f, err := os.Open(path.Join(rootFs, "proc", strconv.Itoa(pid), "schedstat"))
+		if err != nil {
+			return info.CpuSchedstat{}, fmt.Errorf("couldn't open scheduler statistics for process %d: %v", pid, err)
+		}
+		defer f.Close()
+		contents, err := ioutil.ReadAll(f)
+		if err != nil {
+			return info.CpuSchedstat{}, fmt.Errorf("couldn't read scheduler statistics for process %d: %v", pid, err)
+		}
+		rawMetrics := bytes.Split(bytes.TrimRight(contents, "\n"), []byte(" "))
+		if len(rawMetrics) != 3 {
+			return info.CpuSchedstat{}, fmt.Errorf("unexpected number of metrics in schedstat file for process %d", pid)
+		}
+		cacheEntry, ok := pidMetricsCache[pid]
+		if !ok {
+			cacheEntry = &info.CpuSchedstat{}
+			pidMetricsCache[pid] = cacheEntry
+		}
+		for i, rawMetric := range rawMetrics {
+			metric, err := strconv.ParseUint(string(rawMetric), 10, 64)
+			if err != nil {
+				return info.CpuSchedstat{}, fmt.Errorf("parsing error while reading scheduler statistics for process: %d: %v", pid, err)
+			}
+			switch i {
+			case 0:
+				cacheEntry.RunTime = metric
+			case 1:
+				cacheEntry.RunqueueTime = metric
+			case 2:
+				cacheEntry.RunPeriods = metric
+			}
+		}
+	}
+	schedstats := info.CpuSchedstat{}
+	for _, v := range pidMetricsCache {
+		schedstats.RunPeriods += v.RunPeriods
+		schedstats.RunqueueTime += v.RunqueueTime
+		schedstats.RunTime += v.RunTime
+	}
+	return schedstats, nil
+}
+
 func networkStatsFromProc(rootFs string, pid int) ([]info.InterfaceStats, error) {
 	netStatsFile := path.Join(rootFs, "proc", strconv.Itoa(pid), "/net/dev")
 

diff --git a/info/v1/container.go b/info/v1/container.go
@@ -293,10 +293,23 @@ type CpuCFS struct {
 	ThrottledTime uint64 `json:"throttled_time"`
 }
 
+// Cpu Aggregated scheduler statistics
+type CpuSchedstat struct {
+	// https://www.kernel.org/doc/Documentation/scheduler/sched-stats.txt
+
+	// time spent on the cpu
+	RunTime uint64 `json:"run_time"`
+	// time spent waiting on a runqueue
+	RunqueueTime uint64 `json:"runqueue_time"`
+	// # of timeslices run on this cpu
+	RunPeriods uint64 `json:"run_periods"`
+}
+
 // All CPU usage metrics are cumulative from the creation of the container
 type CpuStats struct {
-	Usage CpuUsage `json:"usage"`
-	CFS   CpuCFS   `json:"cfs"`
+	Usage     CpuUsage     `json:"usage"`
+	CFS       CpuCFS       `json:"cfs"`
+	Schedstat CpuSchedstat `json:"schedstat"`
 	// Smoothed average of number of runnable threads x 1000.
 	// We multiply by thousand to avoid using floats, but preserving precision.
 	// Load is smoothed over the last 10 seconds. Instantaneous value can be read

diff --git a/metrics/prometheus.go b/metrics/prometheus.go
@@ -197,6 +197,27 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo
 				getValues: func(s *info.ContainerStats) metricValues {
 					return metricValues{{value: float64(s.Cpu.CFS.ThrottledTime) / float64(time.Second)}}
 				},
+			}, {
+				name:      "container_cpu_schedstat_run_seconds_total",
+				help:      "Time duration the processes of the container have run on the CPU.",
+				valueType: prometheus.CounterValue,
+				getValues: func(s *info.ContainerStats) metricValues {
+					return metricValues{{value: float64(s.Cpu.Schedstat.RunTime) / float64(time.Second)}}
+				},
+			}, {
+				name:      "container_cpu_schedstat_runqueue_seconds_total",
+				help:      "Time duration processes of the container have been waiting on a runqueue.",
+				valueType: prometheus.CounterValue,
+				getValues: func(s *info.ContainerStats) metricValues {
+					return metricValues{{value: float64(s.Cpu.Schedstat.RunqueueTime) / float64(time.Second)}}
+				},
+			}, {
+				name:      "container_cpu_schedstat_run_periods_total",
+				help:      "Number of times processes of the cgroup have run on the cpu",
+				valueType: prometheus.CounterValue,
+				getValues: func(s *info.ContainerStats) metricValues {
+					return metricValues{{value: float64(s.Cpu.Schedstat.RunPeriods)}}
+				},
 			}, {
 				name:      "container_cpu_load_average_10s",
 				help:      "Value of container cpu load average over the last 10 seconds.",

diff --git a/metrics/prometheus_test.go b/metrics/prometheus_test.go
@@ -90,6 +90,11 @@ func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.Container
 							ThrottledPeriods: 18,
 							ThrottledTime:    1724314000,
 						},
+						Schedstat: info.CpuSchedstat{
+							RunTime:      53643567,
+							RunqueueTime: 479424566378,
+							RunPeriods:   984285,
+						},
 						LoadAverage: 2,
 					},
 					Memory: info.MemoryStats{

diff --git a/metrics/testdata/prometheus_metrics b/metrics/testdata/prometheus_metrics
@@ -25,6 +25,15 @@ container_cpu_cfs_throttled_seconds_total{container_env_foo_env="prod",container
 # HELP container_cpu_load_average_10s Value of container cpu load average over the last 10 seconds.
 # TYPE container_cpu_load_average_10s gauge
 container_cpu_load_average_10s{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2
+# HELP container_cpu_schedstat_run_periods_total Number of times processes of the cgroup have run on the cpu
+# TYPE container_cpu_schedstat_run_periods_total counter
+container_cpu_schedstat_run_periods_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 984285
+# HELP container_cpu_schedstat_run_seconds_total Time duration the processes of the container have run on the CPU.
+# TYPE container_cpu_schedstat_run_seconds_total counter
+container_cpu_schedstat_run_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.053643567
+# HELP container_cpu_schedstat_runqueue_seconds_total Time duration processes of the container have been waiting on a runqueue.
+# TYPE container_cpu_schedstat_runqueue_seconds_total counter
+container_cpu_schedstat_runqueue_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 479.424566378
 # HELP container_cpu_system_seconds_total Cumulative system cpu time consumed in seconds.
 # TYPE container_cpu_system_seconds_total counter
 container_cpu_system_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 7e-09