Skip to content

Commit

Permalink
Fix retry on gateway timeout (kubeflow#306) (kubeflow#398)
Browse files Browse the repository at this point in the history
* It looks like ApiException doesn't always store the HttpStatus code in .status
   in some cases it will be stored in the body
  • Loading branch information
jlewi authored and k8s-ci-robot committed May 16, 2019
1 parent 504a82a commit 3e7a1ee
Showing 1 changed file with 29 additions and 9 deletions.
38 changes: 29 additions & 9 deletions py/kubeflow/testing/argo_client.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Some utility functions for working with TfJobs."""

import datetime
import json
import logging
from retrying import retry
import six
import time

from kubernetes import client as k8s_client
Expand Down Expand Up @@ -35,17 +37,35 @@ def log_status(workflow):
GATEWAY_TIMEOUT = 504

def handle_retriable_exception(exception):
if (isinstance(exception, rest.ApiException) and
if isinstance(exception, rest.ApiException):
# ApiException could store the exit code in status or it might
# store it in HTTP response body
# see: https://github.com/kubernetes-client/python/blob/5e512ff564c244c50cab780d821542ed56aa965a/kubernetes/client/rest.py#L289 # pylint: disable=line-too-long
code = None
if exception.body:
if isinstance(exception.body, six.string_types):
body = {}
try:
logging.info("Parsing ApiException body: %s", exception.body)
body = json.loads(exception.body)
except json.JSONDecodeError as e:
logging.error("Error parsing body: %s", e)
else:
body = exception.body
code = body.get("code")
else:
code = exception.status

# UNAUTHORIZED and FORBIDDEN errors can be an indication we need to
# refresh credentials
(exception.status == UNAUTHORIZED or exception.status == FORBIDDEN or
exception.status == GATEWAY_TIMEOUT)):
# Due to https://github.com/kubernetes-client/python-base/issues/59,
# we need to reload the kube config (which refreshes the GCP token).
# TODO(richardsliu): Remove this workaround when the k8s client issue
# is resolved.
util.load_kube_config()
return True
logging.info("ApiException code=%s", code)
if code in [UNAUTHORIZED, FORBIDDEN, GATEWAY_TIMEOUT]:
# Due to https://github.com/kubernetes-client/python-base/issues/59,
# we need to reload the kube config (which refreshes the GCP token).
# TODO(richardsliu): Remove this workaround when the k8s client issue
# is resolved.
util.load_kube_config()
return True
return not isinstance(exception, util.TimeoutError)


Expand Down

0 comments on commit 3e7a1ee

Please sign in to comment.