Skip to content

Commit

Permalink
Merge pull request google#2603 from dqminh/prometheus-on-demand
Browse files Browse the repository at this point in the history
Allow on-demand metrics collection for prometheus
  • Loading branch information
dashpole authored Jul 6, 2020
2 parents 9ec2495 + 20e306a commit 0587e3d
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 36 deletions.
17 changes: 13 additions & 4 deletions cmd/internal/api/versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ import (
"net/http"
"path"
"strconv"
"time"

info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/info/v2"
v2 "github.com/google/cadvisor/info/v2"
"github.com/google/cadvisor/manager"

"k8s.io/klog/v2"
Expand Down Expand Up @@ -313,7 +314,7 @@ func (api *version2_0) SupportedRequestTypes() []string {
}

func (api *version2_0) HandleRequest(requestType string, request []string, m manager.Manager, w http.ResponseWriter, r *http.Request) error {
opt, err := getRequestOptions(r)
opt, err := GetRequestOptions(r)
if err != nil {
return err
}
Expand Down Expand Up @@ -482,7 +483,7 @@ func (api *version2_1) SupportedRequestTypes() []string {

func (api *version2_1) HandleRequest(requestType string, request []string, m manager.Manager, w http.ResponseWriter, r *http.Request) error {
// Get the query request.
opt, err := getRequestOptions(r)
opt, err := GetRequestOptions(r)
if err != nil {
return err
}
Expand Down Expand Up @@ -525,7 +526,8 @@ func (api *version2_1) HandleRequest(requestType string, request []string, m man
}
}

func getRequestOptions(r *http.Request) (v2.RequestOptions, error) {
// GetRequestOptions returns the metrics request options from a HTTP request.
func GetRequestOptions(r *http.Request) (v2.RequestOptions, error) {
supportedTypes := map[string]bool{
v2.TypeName: true,
v2.TypeDocker: true,
Expand Down Expand Up @@ -555,5 +557,12 @@ func getRequestOptions(r *http.Request) (v2.RequestOptions, error) {
if recursive == "true" {
opt.Recursive = true
}
if maxAgeString := r.URL.Query().Get("max_age"); len(maxAgeString) > 0 {
maxAge, err := time.ParseDuration(maxAgeString)
if err != nil {
return opt, fmt.Errorf("failed to parse 'max_age' option: %v", err)
}
opt.MaxAge = &maxAge
}
return opt, nil
}
30 changes: 22 additions & 8 deletions cmd/internal/http/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,28 @@ func RegisterHandlers(mux httpmux.Mux, containerManager manager.Manager, httpAut
// the provided HTTP mux to handle the given Prometheus endpoint.
func RegisterPrometheusHandler(mux httpmux.Mux, resourceManager manager.Manager, prometheusEndpoint string,
f metrics.ContainerLabelsFunc, includedMetrics container.MetricSet) {
r := prometheus.NewRegistry()
r.MustRegister(
metrics.NewPrometheusCollector(resourceManager, f, includedMetrics, clock.RealClock{}),
metrics.NewPrometheusMachineCollector(resourceManager, includedMetrics),
prometheus.NewGoCollector(),
prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}),
)
mux.Handle(prometheusEndpoint, promhttp.HandlerFor(r, promhttp.HandlerOpts{ErrorHandling: promhttp.ContinueOnError}))
goCollector := prometheus.NewGoCollector()
processCollector := prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{})
machineCollector := metrics.NewPrometheusMachineCollector(resourceManager, includedMetrics)

mux.Handle(prometheusEndpoint, http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
opts, err := api.GetRequestOptions(req)
if err != nil {
http.Error(w, "No metrics gathered, last error:\n\n"+err.Error(), http.StatusInternalServerError)
return
}
opts.Count = 1 // we only want the latest datapoint
opts.Recursive = true // get all child containers

r := prometheus.NewRegistry()
r.MustRegister(
metrics.NewPrometheusCollector(resourceManager, f, includedMetrics, clock.RealClock{}, opts),
machineCollector,
goCollector,
processCollector,
)
promhttp.HandlerFor(r, promhttp.HandlerOpts{ErrorHandling: promhttp.ContinueOnError}).ServeHTTP(w, req)
}))
}

func staticHandlerNoAuth(w http.ResponseWriter, r *http.Request) {
Expand Down
2 changes: 1 addition & 1 deletion info/v2/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (

// TODO(rjnagal): Remove dependency after moving all stats structs from v1.
// using v1 now for easy conversion.
"github.com/google/cadvisor/info/v1"
v1 "github.com/google/cadvisor/info/v1"
)

const (
Expand Down
2 changes: 1 addition & 1 deletion manager/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import (
"github.com/google/cadvisor/collector"
"github.com/google/cadvisor/container"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/info/v2"
v2 "github.com/google/cadvisor/info/v2"
"github.com/google/cadvisor/stats"
"github.com/google/cadvisor/summary"
"github.com/google/cadvisor/utils/cpuload"
Expand Down
6 changes: 3 additions & 3 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"time"

info "github.com/google/cadvisor/info/v1"
v2 "github.com/google/cadvisor/info/v2"
)

// metricValue describes a single metric value for a given set of label values
Expand All @@ -32,9 +33,8 @@ type metricValues []metricValue

// infoProvider will usually be manager.Manager, but can be swapped out for testing.
type infoProvider interface {
// SubcontainersInfo provides information about all subcontainers of the
// specified container including itself.
SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
// GetRequestedContainersInfo gets info for all requested containers based on the request options.
GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error)
// GetVersionInfo provides information about the version.
GetVersionInfo() (*info.VersionInfo, error)
// GetMachineInfo provides information about the machine.
Expand Down
7 changes: 5 additions & 2 deletions metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"github.com/google/cadvisor/container"
info "github.com/google/cadvisor/info/v1"
v2 "github.com/google/cadvisor/info/v2"

"github.com/prometheus/client_golang/prometheus"
"k8s.io/klog/v2"
Expand Down Expand Up @@ -97,13 +98,14 @@ type PrometheusCollector struct {
containerMetrics []containerMetric
containerLabelsFunc ContainerLabelsFunc
includedMetrics container.MetricSet
opts v2.RequestOptions
}

// NewPrometheusCollector returns a new PrometheusCollector. The passed
// ContainerLabelsFunc specifies which base labels will be attached to all
// exported metrics. If left to nil, the DefaultContainerLabels function
// will be used instead.
func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetrics container.MetricSet, now clock.Clock) *PrometheusCollector {
func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetrics container.MetricSet, now clock.Clock, opts v2.RequestOptions) *PrometheusCollector {
if f == nil {
f = DefaultContainerLabels
}
Expand All @@ -129,6 +131,7 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
},
},
includedMetrics: includedMetrics,
opts: opts,
}
if includedMetrics.Has(container.CpuUsageMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
Expand Down Expand Up @@ -1780,7 +1783,7 @@ func BaseContainerLabels(whiteList []string) func(container *info.ContainerInfo)
}

func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric) {
containers, err := c.infoProvider.SubcontainersInfo("/", &info.ContainerInfoRequest{NumStats: 1})
containers, err := c.infoProvider.GetRequestedContainersInfo("/", c.opts)
if err != nil {
c.errors.Set(1)
klog.Warningf("Couldn't get containers: %s", err)
Expand Down
15 changes: 8 additions & 7 deletions metrics/prometheus_fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"time"

info "github.com/google/cadvisor/info/v1"
v2 "github.com/google/cadvisor/info/v2"
)

type testSubcontainersInfoProvider struct{}
Expand Down Expand Up @@ -264,9 +265,9 @@ func (p testSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, erro
}, nil
}

func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
return []*info.ContainerInfo{
{
func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.RequestOptions) (map[string]*info.ContainerInfo, error) {
return map[string]*info.ContainerInfo{
"testcontainer": {
ContainerReference: info.ContainerReference{
Name: "testcontainer",
Aliases: []string{"testcontaineralias"},
Expand Down Expand Up @@ -710,10 +711,10 @@ func (p *erroringSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo,
return p.successfulProvider.GetMachineInfo()
}

func (p *erroringSubcontainersInfoProvider) SubcontainersInfo(
a string, r *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
func (p *erroringSubcontainersInfoProvider) GetRequestedContainersInfo(
a string, opt v2.RequestOptions) (map[string]*info.ContainerInfo, error) {
if p.shouldFail {
return []*info.ContainerInfo{}, errors.New("Oops 3")
return map[string]*info.ContainerInfo{}, errors.New("Oops 3")
}
return p.successfulProvider.SubcontainersInfo(a, r)
return p.successfulProvider.GetRequestedContainersInfo(a, opt)
}
36 changes: 26 additions & 10 deletions metrics/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@
package metrics

import (
"errors"
"os"
"testing"
"time"

"github.com/google/cadvisor/container"
info "github.com/google/cadvisor/info/v1"
v2 "github.com/google/cadvisor/info/v2"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
Expand All @@ -35,7 +37,7 @@ func TestPrometheusCollector(t *testing.T) {
s := DefaultContainerLabels(container)
s["zone.name"] = "hello"
return s
}, container.AllMetrics, now)
}, container.AllMetrics, now, v2.RequestOptions{})
reg := prometheus.NewRegistry()
reg.MustRegister(c)

Expand Down Expand Up @@ -64,7 +66,7 @@ func TestPrometheusCollector_scrapeFailure(t *testing.T) {
s := DefaultContainerLabels(container)
s["zone.name"] = "hello"
return s
}, container.AllMetrics, now)
}, container.AllMetrics, now, v2.RequestOptions{})
reg := prometheus.NewRegistry()
reg.MustRegister(c)

Expand All @@ -76,7 +78,7 @@ func TestPrometheusCollector_scrapeFailure(t *testing.T) {
}

func TestNewPrometheusCollectorWithPerf(t *testing.T) {
c := NewPrometheusCollector(mockInfoProvider{}, mockLabelFunc, container.MetricSet{container.PerfMetrics: struct{}{}}, now)
c := NewPrometheusCollector(&mockInfoProvider{}, mockLabelFunc, container.MetricSet{container.PerfMetrics: struct{}{}}, now, v2.RequestOptions{})
assert.Len(t, c.containerMetrics, 5)
names := []string{}
for _, m := range c.containerMetrics {
Expand All @@ -89,18 +91,32 @@ func TestNewPrometheusCollectorWithPerf(t *testing.T) {
assert.Contains(t, names, "container_perf_uncore_events_scaling_ratio")
}

type mockInfoProvider struct{}
func TestNewPrometheusCollectorWithRequestOptions(t *testing.T) {
p := mockInfoProvider{}
opts := v2.RequestOptions{
IdType: "docker",
}
c := NewPrometheusCollector(&p, mockLabelFunc, container.AllMetrics, now, opts)
ch := make(chan prometheus.Metric, 10)
c.Collect(ch)
assert.Equal(t, p.options, opts)
}

type mockInfoProvider struct {
options v2.RequestOptions
}

func (m mockInfoProvider) SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
return nil, nil
func (m *mockInfoProvider) GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error) {
m.options = options
return map[string]*info.ContainerInfo{}, nil
}

func (m mockInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
return nil, nil
func (m *mockInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
return nil, errors.New("not supported")
}

func (m mockInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
return nil, nil
func (m *mockInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
return nil, errors.New("not supported")
}

func mockLabelFunc(*info.ContainerInfo) map[string]string {
Expand Down

0 comments on commit 0587e3d

Please sign in to comment.