Skip to content

Commit

Permalink
Merge pull request #121 from topolvm/controller/dont-resize-if-failed…
Browse files Browse the repository at this point in the history
…-to-get-some-metrics

reconcile: do not resize volume if failed to get inode stats
  • Loading branch information
satoru-takeuchi authored Jun 24, 2022
2 parents 1e9b4ec + 35410dd commit 2cb7577
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 9 deletions.
12 changes: 8 additions & 4 deletions e2e/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,19 @@ TEST_KUBERNETES_TARGET ?= current

ifeq ($(TEST_KUBERNETES_TARGET),current)
KUBERNETES_VERSION := 1.23.3
TOPOLVM_VERSION := 0.11.1
# In This commit, flakiness of CI due to webhook readiness is addressed.
# todo: Use tag if this commit is released
TOPOLVM_VERSION := 31f8d5329c7e518240fb33681245e43f2d3491f3
else ifeq ($(TEST_KUBERNETES_TARGET),prev)
KUBERNETES_VERSION := 1.22.4
TOPOLVM_VERSION := 0.11.1
# In This commit, flakiness of CI due to webhook readiness is addressed.
# todo: Use tag if this commit is released
TOPOLVM_VERSION := 31f8d5329c7e518240fb33681245e43f2d3491f3
else ifeq ($(TEST_KUBERNETES_TARGET),prev2)
KUBERNETES_VERSION := 1.21.2
# Kubernetes 1.21 does not support KubeSchedulerConfiguration API version v1beta2,
# so use older version of TopoLVM
TOPOLVM_VERSION := 0.10.4
TOPOLVM_VERSION := v0.10.4
endif
export KUBERNETES_VERSION

Expand Down Expand Up @@ -76,7 +80,7 @@ endef

$(TMPDIR)/topolvm:
git clone https://github.com/topolvm/topolvm.git $@
cd $@ && git checkout v$(TOPOLVM_VERSION)
cd $@ && git checkout $(TOPOLVM_VERSION)
make -C $(TMPDIR)/topolvm/example setup

autoresizer.img:
Expand Down
16 changes: 13 additions & 3 deletions runners/metrics_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ func NewPrometheusClient(url string) (MetricsClient, error) {

// MetricsClient is an interface for getting metrics
type MetricsClient interface {
// GetMetrics returns volume stats metrics of PVCs
//
// The volume stats consist of available bytes, capacity bytes, available inodes and capacity
// inodes. This method returns volume stats for a PVC only if all four metrics of the PVC was
// retrieved from the metrics source.
GetMetrics(ctx context.Context) (map[types.NamespacedName]*VolumeStats, error)
}

Expand All @@ -51,6 +56,7 @@ type prometheusClient struct {
prometheusAPI prometheusv1.API
}

// GetMetrics implements MetricsClient.GetMetrics
func (c *prometheusClient) GetMetrics(ctx context.Context) (map[types.NamespacedName]*VolumeStats, error) {
volumeStatsMap := make(map[types.NamespacedName]*VolumeStats)

Expand All @@ -76,16 +82,20 @@ func (c *prometheusClient) GetMetrics(ctx context.Context) (map[types.Namespaced

for key, val := range availableBytes {
vs := &VolumeStats{AvailableBytes: val}
if cb, ok := capacityBytes[key]; !ok {
continue
} else {
if cb, ok := capacityBytes[key]; ok {
vs.CapacityBytes = cb
} else {
continue
}
if ais, ok := availableInodeSize[key]; ok {
vs.AvailableInodeSize = ais
} else {
continue
}
if cis, ok := capacityInodeSize[key]; ok {
vs.CapacityInodeSize = cis
} else {
continue
}
volumeStatsMap[key] = vs
}
Expand Down
15 changes: 13 additions & 2 deletions runners/pvc_autoresizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,11 @@ func (w *pvcAutoresizer) reconcile(ctx context.Context) error {
return nil
}
for _, pvc := range pvcs.Items {
log := w.log.WithValues("namespace", pvc.Namespace, "name", pvc.Name)
isTarget, err := isTargetPVC(&pvc)
if err != nil {
metrics.ResizerFailedResizeTotal.Increment(pvc.Name, pvc.Namespace)
w.log.WithValues("namespace", pvc.Namespace, "name", pvc.Name).Error(err, "failed to check target PVC")
log.Error(err, "failed to check target PVC")
continue
} else if !isTarget {
continue
Expand All @@ -131,12 +132,22 @@ func (w *pvcAutoresizer) reconcile(ctx context.Context) error {
Name: pvc.Name,
}
if _, ok := vsMap[namespacedName]; !ok {
// Do not increment ResizerFailedResizeTotal here. The controller cannot get volume
// stats for "offline" volumes (i.e. volumes not mounted by any pod) since kubelet
// exports volume stats of a persistent volume claim only if it is online. Besides,
// NodeExpandVolume RPC assumes that the volume to be published or staged on a node
// (and hence online), the resize request of controller for offline PVC will not be
// processed for the time being. So, we do not regard it as a resize failure that
// the controller failed to retrieve volume stats for the PVC. This may result in a
// failure to increment the counter in the case which the PVC is online but fails
// to retrieve its metrics, but accept this as a limitation for now.
log.Info("failed to get volume stats")
continue
}
err = w.resize(ctx, &pvc, vsMap[namespacedName])
if err != nil {
metrics.ResizerFailedResizeTotal.Increment(pvc.Name, pvc.Namespace)
w.log.WithValues("namespace", pvc.Namespace, "name", pvc.Name).Error(err, "failed to resize PVC")
log.Error(err, "failed to resize PVC")
}
}
}
Expand Down

0 comments on commit 2cb7577

Please sign in to comment.