From 415b3a736b053ea030974955a3f518784d49a10b Mon Sep 17 00:00:00 2001 From: Alan Clucas Date: Wed, 24 Jan 2024 14:55:46 +0000 Subject: [PATCH] fix: make etcd errors transient (#12567) Signed-off-by: Alan Clucas --- util/errors/errors.go | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/util/errors/errors.go b/util/errors/errors.go index 156db1d0308a..496176070128 100644 --- a/util/errors/errors.go +++ b/util/errors/errors.go @@ -28,7 +28,15 @@ func IsTransientErr(err error) bool { return false } err = argoerrs.Cause(err) - isTransient := isExceededQuotaErr(err) || apierr.IsTooManyRequests(err) || isResourceQuotaConflictErr(err) || isResourceQuotaTimeoutErr(err) || isTransientNetworkErr(err) || apierr.IsServerTimeout(err) || apierr.IsServiceUnavailable(err) || matchTransientErrPattern(err) || + isTransient := isExceededQuotaErr(err) || + apierr.IsTooManyRequests(err) || + isResourceQuotaConflictErr(err) || + isResourceQuotaTimeoutErr(err) || + isTransientNetworkErr(err) || + apierr.IsServerTimeout(err) || + apierr.IsServiceUnavailable(err) || + isTransientEtcdErr(err) || + matchTransientErrPattern(err) || errors.Is(err, NewErrTransient("")) if isTransient { log.Infof("Transient error: %v", err) @@ -61,6 +69,16 @@ func isResourceQuotaTimeoutErr(err error) bool { return apierr.IsInternalError(err) && strings.Contains(err.Error(), "resource quota evaluation timed out") } +func isTransientEtcdErr(err error) bool { + // Some clusters expose these (transient) etcd errors to the caller + if strings.Contains(err.Error(), "etcdserver: leader changed") { + return true + } else if strings.Contains(err.Error(), "etcdserver: request timed out") { + return true + } + return false +} + func isTransientNetworkErr(err error) bool { switch err.(type) { case *net.DNSError, *net.OpError, net.UnknownNetworkError: