From 847d6de9ddaa0a8b68949d0b575729d8ae541f66 Mon Sep 17 00:00:00 2001 From: "Sungjun.Kim" Date: Fri, 22 Mar 2019 08:02:02 +0900 Subject: [PATCH] Try to fix CI (#649) * Add a timer to multi_tenancy_test * Fix the following YAML warning ``` YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details. parsed = yaml.load(rendered) ``` * Add a timer to kubernetes_namespace * Check redis's status after deploying it --- .../docker/docker_metric_utils.py | 4 +- .../kubernetes_container_manager.py | 48 ++++++++++++++----- .../kubernetes/redis-deployment.yaml | 2 +- .../kubernetes/redis-service.yaml | 2 +- containers/python/rpc.py | 2 +- integration-tests/clipper_metric_docker.py | 2 +- integration-tests/clipper_metric_kube.py | 2 +- integration-tests/kubernetes_namespace.py | 2 + integration-tests/multi_tenancy_test.py | 3 ++ 9 files changed, 49 insertions(+), 18 deletions(-) diff --git a/clipper_admin/clipper_admin/docker/docker_metric_utils.py b/clipper_admin/clipper_admin/docker/docker_metric_utils.py index e528edb44..e868f7233 100644 --- a/clipper_admin/clipper_admin/docker/docker_metric_utils.py +++ b/clipper_admin/clipper_admin/docker/docker_metric_utils.py @@ -121,7 +121,7 @@ def add_to_metric_config(model_container_name, prom_config_path, :py:exc:`clipper.ClipperException` """ with open(prom_config_path, 'r') as f: - conf = yaml.load(f) + conf = yaml.load(f, Loader=yaml.FullLoader) for config in conf['scrape_configs']: if config['job_name'] == model_container_name: @@ -157,7 +157,7 @@ def delete_from_metric_config(model_container_name, prom_config_path, :return: None """ with open(prom_config_path, 'r') as f: - conf = yaml.load(f) + conf = yaml.load(f, Loader=yaml.FullLoader) for i, config in enumerate(conf['scrape_configs']): if config['job_name'] == model_container_name: diff --git a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py index e0777082f..48b909957 100644 --- a/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py +++ b/clipper_admin/clipper_admin/kubernetes/kubernetes_container_manager.py @@ -189,24 +189,39 @@ def start_clipper(self, def _start_redis(self, sleep_time=5): # If an existing Redis service isn't provided, start one if self.redis_ip is None: + deployment_name = 'redis-at-{cluster_name}'.format( + cluster_name=self.cluster_name) + with _pass_conflicts(): self._k8s_beta.create_namespaced_deployment( body=self._generate_config( CONFIG_FILES['redis']['deployment'], + deployment_name=deployment_name, cluster_name=self.cluster_name), namespace=self.k8s_namespace) with _pass_conflicts(): body = self._generate_config( CONFIG_FILES['redis']['service'], + deployment_name=deployment_name, public_redis_port=self.redis_port, cluster_name=self.cluster_name) self._k8s_v1.create_namespaced_service( body=body, namespace=self.k8s_namespace) time.sleep(sleep_time) - self.redis_ip = 'redis-at-{cluster_name}'.format( - cluster_name=self.cluster_name) + # Wait for max 10 minutes + wait_count = 0 + while self._k8s_beta.read_namespaced_deployment( + name=deployment_name, + namespace=self.k8s_namespace).status.available_replicas != 1: + time.sleep(3) + wait_count += 3 + if wait_count > 600: + raise ClipperException( + "Could not create a Kubernetes deployment: {}".format(deployment_name)) + + self.redis_ip = deployment_name def _start_mgmt(self, mgmt_image): with _pass_conflicts(): @@ -287,7 +302,7 @@ def _start_prometheus(self): def _generate_config(self, file_path, **kwargs): template = self.template_engine.get_template(file_path) rendered = template.render(**kwargs) - parsed = yaml.load(rendered) + parsed = yaml.load(rendered, Loader=yaml.FullLoader) return parsed def connect(self): @@ -299,7 +314,7 @@ def connect(self): if addr.type == "ExternalDNS": external_node_hosts.append(addr.address) - if len(external_node_hosts) == 0 and (self.useInternalIP): + if len(external_node_hosts) == 0 and self.useInternalIP: msg = "No external node addresses found. Using Internal IP address" self.logger.warning(msg) for addr in node.status.addresses: @@ -389,14 +404,21 @@ def deploy_model(self, name, version, input_type, image, num_replicas=1): self._k8s_beta.create_namespaced_deployment( body=generated_body, namespace=self.k8s_namespace) - while self._k8s_beta.read_namespaced_deployment_status( + # Wait for max 10 minutes + wait_count = 0 + while self._k8s_beta.read_namespaced_deployment( name=deployment_name, namespace=self.k8s_namespace).status.available_replicas \ != num_replicas: time.sleep(3) + wait_count += 3 + if wait_count > 600: + raise ClipperException( + "Could not create a Kubernetes deployment. " + "Model: {}-{} Image: {}".format(name, version, image)) def get_num_replicas(self, name, version): deployment_name = get_model_deployment_name( - name, version, query_frontend_id=0) + name, version, query_frontend_id=0, cluster_name=self.cluster_name) response = self._k8s_beta.read_namespaced_deployment_scale( name=deployment_name, namespace=self.k8s_namespace) @@ -406,7 +428,7 @@ def set_num_replicas(self, name, version, input_type, image, num_replicas): # NOTE: assumes `metadata.name` can identify the model deployment. for query_frontend_id in range(self.num_frontend_replicas): deployment_name = get_model_deployment_name( - name, version, query_frontend_id) + name, version, query_frontend_id, self.cluster_name) self._k8s_beta.patch_namespaced_deployment_scale( name=deployment_name, @@ -417,10 +439,17 @@ def set_num_replicas(self, name, version, input_type, image, num_replicas): } }) - while self._k8s_beta.read_namespaced_deployment_status( + # Wait for max 10 minutes + wait_count = 0 + while self._k8s_beta.read_namespaced_deployment( name=deployment_name, namespace=self.k8s_namespace).status.available_replicas \ != num_replicas: time.sleep(3) + wait_count += 3 + if wait_count > 600: + raise ClipperException( + "Could not update scale of the specified Deployment. " + "Model: {}-{} Image: {}".format(name, version, image)) def get_logs(self, logging_dir): logging_dir = os.path.abspath(os.path.expanduser(logging_dir)) @@ -514,9 +543,6 @@ def stop_all(self, graceful=True): logging.warning( "Exception deleting kubernetes resources: {}".format(e)) - def get_registry(self): - return self.registry - def get_admin_addr(self): if self.use_k8s_proxy: return ("{proxy_addr}/api/v1/namespaces/{ns}/" diff --git a/clipper_admin/clipper_admin/kubernetes/redis-deployment.yaml b/clipper_admin/clipper_admin/kubernetes/redis-deployment.yaml index 046d95f56..ee8923a29 100644 --- a/clipper_admin/clipper_admin/kubernetes/redis-deployment.yaml +++ b/clipper_admin/clipper_admin/kubernetes/redis-deployment.yaml @@ -4,7 +4,7 @@ metadata: labels: ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: redis - name: redis-at-{{ cluster_name }} + name: {{ deployment_name }} # Cluster name included spec: replicas: 1 template: diff --git a/clipper_admin/clipper_admin/kubernetes/redis-service.yaml b/clipper_admin/clipper_admin/kubernetes/redis-service.yaml index 11fe8528c..000ab585a 100644 --- a/clipper_admin/clipper_admin/kubernetes/redis-service.yaml +++ b/clipper_admin/clipper_admin/kubernetes/redis-service.yaml @@ -4,7 +4,7 @@ metadata: labels: ai.clipper.container.label: {{ cluster_name }} ai.clipper.name: redis - name: redis-at-{{ cluster_name }} + name: {{ deployment_name }} # Cluster name included spec: type: NodePort ports: diff --git a/containers/python/rpc.py b/containers/python/rpc.py index 4c3a09f14..2e683e21d 100644 --- a/containers/python/rpc.py +++ b/containers/python/rpc.py @@ -733,7 +733,7 @@ def add_metrics(): os.path.split(os.path.realpath(__file__))[0], config_file_path) with open(config_file_path, 'r') as f: - config = yaml.load(f) + config = yaml.load(f, Loader=yaml.FullLoader) config = config['Model Container'] prefix = 'clipper_{}_'.format(config.pop('prefix')) diff --git a/integration-tests/clipper_metric_docker.py b/integration-tests/clipper_metric_docker.py index e55a8c827..8f3647a18 100644 --- a/integration-tests/clipper_metric_docker.py +++ b/integration-tests/clipper_metric_docker.py @@ -34,7 +34,7 @@ def get_metrics_config(): config_path = os.path.join( os.path.abspath("%s/../monitoring" % cur_dir), 'metrics_config.yaml') with open(config_path, 'r') as f: - conf = yaml.load(f) + conf = yaml.load(f, Loader=yaml.FullLoader) return conf diff --git a/integration-tests/clipper_metric_kube.py b/integration-tests/clipper_metric_kube.py index cd582c4f4..ff95f7097 100644 --- a/integration-tests/clipper_metric_kube.py +++ b/integration-tests/clipper_metric_kube.py @@ -118,7 +118,7 @@ def get_metrics_config(): config_path = os.path.join( os.path.abspath("%s/../monitoring" % cur_dir), 'metrics_config.yaml') with open(config_path, 'r') as f: - conf = yaml.load(f) + conf = yaml.load(f, Loader=yaml.FullLoader) return conf diff --git a/integration-tests/kubernetes_namespace.py b/integration-tests/kubernetes_namespace.py index 0a34d9668..8597cc15b 100644 --- a/integration-tests/kubernetes_namespace.py +++ b/integration-tests/kubernetes_namespace.py @@ -24,6 +24,8 @@ def test(): deploy_(conn_1) deploy_(conn_2) + time.sleep(10) + res_1 = predict_(conn_1.get_query_addr(), [.1, .2, .3]) res_2 = predict_(conn_2.get_query_addr(), [.1, .2, .3]) assert not res_1['default'] diff --git a/integration-tests/multi_tenancy_test.py b/integration-tests/multi_tenancy_test.py index a8f7d93f2..c46ad4236 100644 --- a/integration-tests/multi_tenancy_test.py +++ b/integration-tests/multi_tenancy_test.py @@ -11,6 +11,7 @@ from test_utils import create_kubernetes_connection, create_docker_connection, CLIPPER_CONTAINER_REGISTRY from random import randint + def test(kubernetes): conn_1 = create('multi-tenancy-1-{}'.format(randint(1,9999)), use_kubernetes=kubernetes) conn_2 = create('multi-tenancy-2-{}'.format(randint(1,9999)), use_kubernetes=kubernetes) @@ -18,6 +19,8 @@ def test(kubernetes): deploy_(conn_1, use_kubernetes=kubernetes) deploy_(conn_2, use_kubernetes=kubernetes) + time.sleep(10) + res_1 = predict_(conn_1.get_query_addr(), [.1, .2, .3]) res_2 = predict_(conn_2.get_query_addr(), [.1, .2, .3]) assert not res_1['default']