Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
7bb8b34
add raw_cgroup_prefix_whitelist flag and fix issue #2129
viberan Feb 3, 2019
1032888
Merge pull request #2164 from viberan/master
dashpole Feb 19, 2019
046818d
fs: get inodes and disk usage via pure go
Feb 15, 2019
05529e2
Merge pull request #2171 from namreg/replace-du-and-find
dashpole Feb 25, 2019
491c0ed
v0.33.0 Changelog
dashpole Feb 27, 2019
511ec9e
Merge pull request #2180 from dashpole/v0.33_changelog
dashpole Feb 27, 2019
2522da0
Always collect disk stats for rootfs.
Random-Liu Mar 7, 2019
7e9ea00
Merge pull request #2191 from Random-Liu/always-rootfs-disk-stats
dashpole Mar 7, 2019
28d11ad
Fix stats in LXD with ZFS storage pool (#2189)
ktsakalozos Mar 12, 2019
1e13a85
container: crio: Return more informative error
haircommander Mar 21, 2019
150c78b
Make the containerd factory configurable
0902horn Mar 22, 2019
50076da
Merge pull request #2201 from haircommander/info_error_fix
dashpole Mar 22, 2019
52f7d1d
Merge pull request #2203 from 0902horn/configurable-containerd-factory
dashpole Mar 26, 2019
23bfbb9
fix typo
mas9612 Apr 1, 2019
68532f6
Merge pull request #2211 from mas9612/fix-documentation
dashpole Apr 1, 2019
c492e4a
Remove CloudProvider SDK dependencies
tallclair Apr 4, 2019
4ca6c21
Merge pull request #2213 from tallclair/cloud
dashpole Apr 4, 2019
40e6acb
Reorganize code to allow conditional enablement of runtimes
dims Mar 29, 2019
1276700
Merge pull request #2209 from dims/conditional-registration-of-runtimes
dashpole Apr 5, 2019
f7b5092
Restore manager.New signature, initialization code
liggitt Apr 9, 2019
58c04bc
Support multiple storage backends
choury Apr 9, 2019
e24fd90
Move auto-registration to explicit install packages, register plugin …
liggitt Apr 9, 2019
7dc4594
Add InitializeFSContext hook to plugins
liggitt Apr 9, 2019
6757727
Split rkt context initialization
liggitt Apr 9, 2019
e9a44a2
Split crio context initialization
liggitt Apr 9, 2019
a022fa7
Split docker context initialization
liggitt Apr 9, 2019
f8a73e0
Move fs.Context to types.go
liggitt Apr 10, 2019
9db8c7d
Merge pull request #2217 from liggitt/plugin-init-hooks
tallclair Apr 11, 2019
9620b8c
Merge pull request #2216 from choury/master
dashpole Apr 26, 2019
6730952
Update GLIBC_VERSION and alpine Version
abelgana Apr 29, 2019
10f73b2
Merge pull request #2230 from abelgana/master
dashpole Apr 29, 2019
18566a5
add manifests for rbac and psps
george-angel May 1, 2019
fe29c51
Merge pull request #2233 from george-angel/master
dashpole May 2, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

### 0.33.0 (2019-02-26)
- Add --raw_cgroup_prefix_whitelist flag to allow configuring which raw cgroup trees cAdvisor monitors
- Replace `du` and `find` with a golang implementation
- Periodically update MachineInfo to support hot-add/remove
- Add explicit timestamps to prometheus metrics to fix rate calculations
- Add --url_base_prefix flag to provide better support for reverse proxies
- Add --white_listed_container_labels flag to allow specifying the container labels added as prometheus labels

### 0.32.0 (2018-11-12)
- Add container process and file descriptor metrics (disabled by default)
- Rename `type` label to `failure_type` for prometheus `memory_failures_total` metric
Expand Down
8 changes: 4 additions & 4 deletions cache/memory/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ type InMemoryCache struct {
lock sync.RWMutex
containerCacheMap map[string]*containerCache
maxAge time.Duration
backend storage.StorageDriver
backend []storage.StorageDriver
}

func (self *InMemoryCache) AddStats(cInfo *info.ContainerInfo, stats *info.ContainerStats) error {
Expand All @@ -86,11 +86,11 @@ func (self *InMemoryCache) AddStats(cInfo *info.ContainerInfo, stats *info.Conta
}
}()

if self.backend != nil {
for _, backend := range self.backend {
// TODO(monnand): To deal with long delay write operations, we
// may want to start a pool of goroutines to do write
// operations.
if err := self.backend.AddStats(cInfo, stats); err != nil {
if err := backend.AddStats(cInfo, stats); err != nil {
klog.Error(err)
}
}
Expand Down Expand Up @@ -131,7 +131,7 @@ func (self *InMemoryCache) RemoveContainer(containerName string) error {

func New(
maxAge time.Duration,
backend storage.StorageDriver,
backend []storage.StorageDriver,
) *InMemoryCache {
ret := &InMemoryCache{
containerCacheMap: make(map[string]*containerCache, 32),
Expand Down
14 changes: 11 additions & 3 deletions cadvisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package main

import (
"crypto/tls"
"flag"
"fmt"
"net/http"
Expand All @@ -29,12 +30,17 @@ import (
"github.com/google/cadvisor/container"
cadvisorhttp "github.com/google/cadvisor/http"
"github.com/google/cadvisor/manager"
"github.com/google/cadvisor/metrics"
"github.com/google/cadvisor/utils/sysfs"
"github.com/google/cadvisor/version"

"crypto/tls"
// Register container providers
_ "github.com/google/cadvisor/container/install"

"github.com/google/cadvisor/metrics"
// Register CloudProviders
_ "github.com/google/cadvisor/utils/cloudinfo/aws"
_ "github.com/google/cadvisor/utils/cloudinfo/azure"
_ "github.com/google/cadvisor/utils/cloudinfo/gce"

"k8s.io/klog"
)
Expand Down Expand Up @@ -65,6 +71,8 @@ var whitelistedContainerLabels = flag.String("whitelisted_container_labels", "",

var urlBasePrefix = flag.String("url_base_prefix", "", "prefix path that will be prepended to all paths to support some reverse proxies")

var rawCgroupPrefixWhiteList = flag.String("raw_cgroup_prefix_whitelist", "", "A comma-separated list of cgroup path prefix that needs to be collected even when -docker_only is specified")

var (
// Metrics to be ignored.
// Tcp metrics are ignored by default.
Expand Down Expand Up @@ -145,7 +153,7 @@ func main() {

collectorHttpClient := createCollectorHttpClient(*collectorCert, *collectorKey)

containerManager, err := manager.New(memoryStorage, sysFs, *maxHousekeepingInterval, *allowDynamicHousekeeping, includedMetrics, &collectorHttpClient, []string{"/"})
containerManager, err := manager.New(memoryStorage, sysFs, *maxHousekeepingInterval, *allowDynamicHousekeeping, includedMetrics, &collectorHttpClient, strings.Split(*rawCgroupPrefixWhiteList, ","))
if err != nil {
klog.Fatalf("Failed to create a Container Manager: %s", err)
}
Expand Down
4 changes: 2 additions & 2 deletions container/common/container_hints_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func TestGetContainerHintsFromFile(t *testing.T) {

if cHints.AllHosts[0].NetworkInterface.VethHost != "veth24031eth1" &&
cHints.AllHosts[0].NetworkInterface.VethChild != "eth1" {
t.Errorf("Cannot find network interface in %s", cHints)
t.Errorf("Cannot find network interface in %+v", cHints)
}

correctMountDirs := [...]string{
Expand All @@ -44,7 +44,7 @@ func TestGetContainerHintsFromFile(t *testing.T) {

for i, mountDir := range cHints.AllHosts[0].Mounts {
if correctMountDirs[i] != mountDir.HostDir {
t.Errorf("Cannot find mount %s in %s", mountDir.HostDir, cHints)
t.Errorf("Cannot find mount %s in %+v", mountDir.HostDir, cHints)
}
}
}
Expand Down
29 changes: 13 additions & 16 deletions container/common/fsHandler.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ type realFsHandler struct {
}

const (
timeout = 2 * time.Minute
maxBackoffFactor = 20
)

Expand All @@ -74,36 +73,34 @@ func NewFsHandler(period time.Duration, rootfs, extraDir string, fsInfo fs.FsInf

func (fh *realFsHandler) update() error {
var (
baseUsage, extraDirUsage, inodeUsage uint64
rootDiskErr, rootInodeErr, extraDiskErr error
rootUsage, extraUsage fs.UsageInfo
rootErr, extraErr error
)
// TODO(vishh): Add support for external mounts.
if fh.rootfs != "" {
baseUsage, rootDiskErr = fh.fsInfo.GetDirDiskUsage(fh.rootfs, timeout)
inodeUsage, rootInodeErr = fh.fsInfo.GetDirInodeUsage(fh.rootfs, timeout)
rootUsage, rootErr = fh.fsInfo.GetDirUsage(fh.rootfs)
}

if fh.extraDir != "" {
extraDirUsage, extraDiskErr = fh.fsInfo.GetDirDiskUsage(fh.extraDir, timeout)
extraUsage, extraErr = fh.fsInfo.GetDirUsage(fh.extraDir)
}

// Wait to handle errors until after all operartions are run.
// An error in one will not cause an early return, skipping others
fh.Lock()
defer fh.Unlock()
fh.lastUpdate = time.Now()
if rootInodeErr == nil && fh.rootfs != "" {
fh.usage.InodeUsage = inodeUsage
if fh.rootfs != "" && rootErr == nil {
fh.usage.InodeUsage = rootUsage.Inodes
fh.usage.TotalUsageBytes = rootUsage.Bytes + extraUsage.Bytes
}
if rootDiskErr == nil && fh.rootfs != "" {
fh.usage.TotalUsageBytes = baseUsage + extraDirUsage
}
if extraDiskErr == nil && fh.extraDir != "" {
fh.usage.BaseUsageBytes = baseUsage
if fh.extraDir != "" && extraErr == nil {
fh.usage.BaseUsageBytes = rootUsage.Bytes
}

// Combine errors into a single error to return
if rootDiskErr != nil || rootInodeErr != nil || extraDiskErr != nil {
return fmt.Errorf("rootDiskErr: %v, rootInodeErr: %v, extraDiskErr: %v", rootDiskErr, rootInodeErr, extraDiskErr)
if rootErr != nil || extraErr != nil {
return fmt.Errorf("rootDiskErr: %v, extraDiskErr: %v", rootErr, extraErr)
}
return nil
}
Expand Down Expand Up @@ -132,7 +129,7 @@ func (fh *realFsHandler) trackUsage() {
// if the long duration is persistent either because of slow
// disk or lots of containers.
longOp = longOp + time.Second
klog.V(2).Infof("du and find on following dirs took %v: %v; will not log again for this container unless duration exceeds %v", duration, []string{fh.rootfs, fh.extraDir}, longOp)
klog.V(2).Infof("fs: disk usage and inodes count on following dirs took %v: %v; will not log again for this container unless duration exceeds %v", duration, []string{fh.rootfs, fh.extraDir}, longOp)
}
}
}
Expand Down
10 changes: 2 additions & 8 deletions container/containerd/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@ import (
"google.golang.org/grpc"
)

const (
// k8sNamespace is the namespace we use to connect containerd.
k8sNamespace = "k8s.io"
)

type client struct {
containerService containersapi.ContainersClient
taskService tasksapi.TasksClient
Expand All @@ -52,13 +47,12 @@ var once sync.Once
var ctrdClient containerdClient = nil

const (
address = "/run/containerd/containerd.sock"
maxBackoffDelay = 3 * time.Second
connectionTimeout = 2 * time.Second
)

// Client creates a containerd client
func Client() (containerdClient, error) {
func Client(address, namespace string) (containerdClient, error) {
var retErr error
once.Do(func() {
tryConn, err := net.DialTimeout("unix", address, connectionTimeout)
Expand All @@ -75,7 +69,7 @@ func Client() (containerdClient, error) {
grpc.WithBackoffMaxDelay(maxBackoffDelay),
grpc.WithTimeout(connectionTimeout),
}
unary, stream := newNSInterceptors(k8sNamespace)
unary, stream := newNSInterceptors(namespace)
gopts = append(gopts,
grpc.WithUnaryInterceptor(unary),
grpc.WithStreamInterceptor(stream),
Expand Down
9 changes: 5 additions & 4 deletions container/containerd/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@ import (
"github.com/google/cadvisor/container/libcontainer"
"github.com/google/cadvisor/fs"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/manager/watcher"
"github.com/google/cadvisor/watcher"
)

var ArgContainerdEndpoint = flag.String("containerd", "unix:///var/run/containerd.sock", "containerd endpoint")
var ArgContainerdEndpoint = flag.String("containerd", "/run/containerd/containerd.sock", "containerd endpoint")
var ArgContainerdNamespace = flag.String("containerd-namespace", "k8s.io", "containerd namespace")

// The namespace under which containerd aliases are unique.
const k8sContainerdNamespace = "containerd"
Expand All @@ -56,7 +57,7 @@ func (self *containerdFactory) String() string {
}

func (self *containerdFactory) NewContainerHandler(name string, inHostNamespace bool) (handler container.ContainerHandler, err error) {
client, err := Client()
client, err := Client(*ArgContainerdEndpoint, *ArgContainerdNamespace)
if err != nil {
return
}
Expand Down Expand Up @@ -118,7 +119,7 @@ func (self *containerdFactory) DebugInfo() map[string][]string {

// Register root container before running this function!
func Register(factory info.MachineInfoFactory, fsInfo fs.FsInfo, includedMetrics container.MetricSet) error {
client, err := Client()
client, err := Client(*ArgContainerdEndpoint, *ArgContainerdNamespace)
if err != nil {
return fmt.Errorf("unable to create containerd client: %v", err)
}
Expand Down
29 changes: 29 additions & 0 deletions container/containerd/install/install.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright 2019 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// The install package registers containerd.NewPlugin() as the "containerd" container provider when imported
package install

import (
"github.com/google/cadvisor/container"
"github.com/google/cadvisor/container/containerd"
"k8s.io/klog"
)

func init() {
err := container.RegisterPlugin("containerd", containerd.NewPlugin())
if err != nil {
klog.Fatalf("Failed to register containerd plugin: %v", err)
}
}
38 changes: 38 additions & 0 deletions container/containerd/plugin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2019 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package containerd

import (
"github.com/google/cadvisor/container"
"github.com/google/cadvisor/fs"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/watcher"
)

// NewPlugin returns an implementation of container.Plugin suitable for passing to container.RegisterPlugin()
func NewPlugin() container.Plugin {
return &plugin{}
}

type plugin struct{}

func (p *plugin) InitializeFSContext(context *fs.Context) error {
return nil
}

func (p *plugin) Register(factory info.MachineInfoFactory, fsInfo fs.FsInfo, includedMetrics container.MetricSet) (watcher.ContainerWatcher, error) {
err := Register(factory, fsInfo, includedMetrics)
return nil, err
}
7 changes: 7 additions & 0 deletions container/crio/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@ func (c *crioClientImpl) ContainerInfo(id string) (*ContainerInfo, error) {
return nil, err
}
defer resp.Body.Close()

// golang's http.Do doesn't return an error if non 200 response code is returned
// handle this case here, rather than failing to decode the body
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("Error finding container %s: Status %d returned error %s", id, resp.StatusCode, resp.Body)
}

cInfo := ContainerInfo{}
if err := json.NewDecoder(resp.Body).Decode(&cInfo); err != nil {
return nil, err
Expand Down
2 changes: 1 addition & 1 deletion container/crio/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
"github.com/google/cadvisor/container/libcontainer"
"github.com/google/cadvisor/fs"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/manager/watcher"
"github.com/google/cadvisor/watcher"

"k8s.io/klog"
)
Expand Down
29 changes: 29 additions & 0 deletions container/crio/install/install.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright 2019 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// The install package registers crio.NewPlugin() as the "crio" container provider when imported
package install

import (
"github.com/google/cadvisor/container"
"github.com/google/cadvisor/container/crio"
"k8s.io/klog"
)

func init() {
err := container.RegisterPlugin("crio", crio.NewPlugin())
if err != nil {
klog.Fatalf("Failed to register crio plugin: %v", err)
}
}
Loading