Skip to content

Commit 7b0ffaf

Browse files
authored
Tink worker image pull retry (#122)
## Description Image pull retry for tink-worker image ## Why is this needed There could be races where linuxkit network or dns may not have been fully set up and functional yet and image pull fails because of that. Fixes: # Implement image pull retry loop for tink-worker image. This loop attempts an image pull every 5 seconds, upto 10 times ## How Has This Been Tested? Tested by re-running a full provisioning workflows on bare metal server and simulate image pull failures artificially to test out the retry logic. ## How are existing users impacted? What migration steps/scripts do we need? ## Checklist: I have: - [ ] updated the documentation and/or roadmap (if required) - [ ] added unit or e2e tests - [ ] provided instructions on how to upgrade
2 parents 3aeed5e + 6a6ac91 commit 7b0ffaf

File tree

3 files changed

+28
-4
lines changed

3 files changed

+28
-4
lines changed

hook-bootkit/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ require github.com/docker/docker v20.10.2+incompatible
66

77
require (
88
github.com/Microsoft/go-winio v0.4.16 // indirect
9+
github.com/cenkalti/backoff/v4 v4.1.3 // indirect
910
github.com/containerd/containerd v1.4.3 // indirect
1011
github.com/docker/distribution v2.7.1+incompatible // indirect
1112
github.com/docker/go-connections v0.4.0 // indirect

hook-bootkit/go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,14 @@ github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg6
44
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
55
github.com/Microsoft/go-winio v0.4.16 h1:FtSW/jqD+l4ba5iPBj9CODVtgfYAD8w2wS923g/cFDk=
66
github.com/Microsoft/go-winio v0.4.16/go.mod h1:XB6nPKklQyQ7GC9LdcBEcBl8PF76WugXOPRXwdLnMv0=
7+
github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8UtC4=
8+
github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw=
79
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
810
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
911
github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
1012
github.com/containerd/containerd v1.4.3 h1:ijQT13JedHSHrQGWFcGEwzcNKrAGIiZ+jSD5QQG07SY=
1113
github.com/containerd/containerd v1.4.3/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA=
14+
github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw=
1215
github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
1316
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
1417
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
@@ -153,6 +156,7 @@ google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2
153156
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
154157
google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
155158
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
159+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
156160
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
157161
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
158162
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

hook-bootkit/main.go

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"strings"
1414
"time"
1515

16+
"github.com/cenkalti/backoff/v4"
1617
"github.com/docker/docker/api/types"
1718
"github.com/docker/docker/api/types/container"
1819
"github.com/docker/docker/api/types/mount"
@@ -46,6 +47,8 @@ type tinkConfig struct {
4647
tinkServerTLS string
4748
}
4849

50+
const maxRetryAttempts = 20
51+
4952
func main() {
5053
fmt.Println("Starting BootKit")
5154

@@ -144,16 +147,32 @@ func main() {
144147

145148
fmt.Printf("Pulling image [%s]", imageName)
146149

147-
out, err := cli.ImagePull(ctx, imageName, pullOpts)
148-
if err != nil {
150+
// TODO: Ideally if this function becomes a loop that runs forever and keeps retrying
151+
// anything that failed, this retry would not be needed. For now, this addresses the specific
152+
// race condition case of when the linuxkit network or dns is in the process of, but not quite
153+
// fully set up yet.
154+
155+
var out io.ReadCloser
156+
imagePullOperation := func() error {
157+
out, err = cli.ImagePull(ctx, imageName, pullOpts)
158+
if err != nil {
159+
fmt.Printf("Image pull failure %s, %v\n", imageName, err)
160+
return err
161+
}
162+
return nil
163+
}
164+
if err = backoff.Retry(imagePullOperation, backoff.WithMaxRetries(backoff.NewExponentialBackOff(), maxRetryAttempts)); err != nil {
149165
panic(err)
150166
}
151167

152-
_, err = io.Copy(os.Stdout, out)
153-
if err != nil {
168+
if _, err = io.Copy(os.Stdout, out); err != nil {
154169
panic(err)
155170
}
156171

172+
if err = out.Close(); err != nil {
173+
fmt.Printf("error closing io.ReadCloser out: %s", err)
174+
}
175+
157176
resp, err := cli.ContainerCreate(ctx, tinkContainer, tinkHostConfig, nil, nil, "")
158177
if err != nil {
159178
panic(err)

0 commit comments

Comments
 (0)