Skip to content

Commit 0735ed3

Browse files
authored
backport: CSI: skip node unpublish on GC'd or down nodes (#14720)
1 parent 0a29959 commit 0735ed3

File tree

2 files changed

+19
-0
lines changed

2 files changed

+19
-0
lines changed

.changelog/14720.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:bug
2+
csi: Fixed a bug where volume claims on lost or garbage collected nodes could not be freed
3+
```

nomad/csi_endpoint.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,22 @@ func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.C
636636
return err
637637
}
638638

639+
// If the node has been GC'd or is down, we can't send it a node
640+
// unpublish. We need to assume the node has unpublished at its
641+
// end. If it hasn't, any controller unpublish will potentially
642+
// hang or error and need to be retried.
643+
if claim.NodeID != "" {
644+
node, err := snap.NodeByID(memdb.NewWatchSet(), claim.NodeID)
645+
if err != nil {
646+
return err
647+
}
648+
if node == nil || node.Status == structs.NodeStatusDown {
649+
v.logger.Debug("skipping node unpublish for down or GC'd node")
650+
claim.State = structs.CSIVolumeClaimStateNodeDetached
651+
return v.checkpointClaim(vol, claim)
652+
}
653+
}
654+
639655
if claim.AllocationID != "" {
640656
err := v.nodeUnpublishVolumeImpl(vol, claim)
641657
if err != nil {

0 commit comments

Comments
 (0)