From 387f725620bf40c2eadbd6428c37d0c518968087 Mon Sep 17 00:00:00 2001 From: Zhenghui Wang Date: Tue, 7 May 2019 16:16:34 -0700 Subject: [PATCH] GC backend services in kubeflow-ci-deployment (#382) * GC backend services in ci-deployment * update libsonnet --- py/kubeflow/testing/cleanup_ci.py | 48 +++++++++++++++++++ .../ks_app/components/cleanup-ci-cron.jsonnet | 2 +- .../ks_app/components/cleanup-ci.libsonnet | 27 ++++++----- 3 files changed, 63 insertions(+), 14 deletions(-) diff --git a/py/kubeflow/testing/cleanup_ci.py b/py/kubeflow/testing/cleanup_ci.py index d6362feaa81..088095dd018 100644 --- a/py/kubeflow/testing/cleanup_ci.py +++ b/py/kubeflow/testing/cleanup_ci.py @@ -253,6 +253,43 @@ def cleanup_firewall_rules(args): logging.info("Unexpired firewall rules:\n%s", "\n".join(unexpired)) logging.info("expired firewall rules:\n%s", "\n".join(expired)) +def cleanup_backend_services(args): + if not args.gc_backend_services: + return + + credentials = GoogleCredentials.get_application_default() + compute = discovery.build('compute', 'v1', credentials=credentials) + backends = compute.backendServices() + next_page_token = None + expired = [] + unexpired = [] + + while True: + results = backends.list(project=args.project, + pageToken=next_page_token).execute() + if not "items" in results: + break + for s in results["items"]: + name = s["name"] + age = getAge(s["creationTimestamp"]) + if age > datetime.timedelta( + hours=args.max_ci_deployment_resource_age_hours): + logging.info("Deleting backend services: %s, age = %r", name, age) + if not args.dryrun: + response = backends.delete(project=args.project, backendService=name) + logging.info("respone = %s", response) + expired.append(name) + else: + unexpired.append(name) + + if not "nextPageToken" in results: + break + next_page_token = results["nextPageToken"] + + logging.info("Unexpired backend services:\n%s", "\n".join(unexpired)) + logging.info("expired backend services:\n%s", "\n".join(expired)) + + def cleanup_health_checks(args): credentials = GoogleCredentials.get_application_default() @@ -566,6 +603,7 @@ def cleanup_all(args): cleanup_service_account_bindings, cleanup_workflows, cleanup_disks, + cleanup_backend_services, cleanup_firewall_rules, cleanup_health_checks] for op in ops: @@ -624,6 +662,16 @@ def main(): parser.add_argument( "--max_age_hours", default=3, type=int, help=("The age of deployments to gc.")) + parser.add_argument( + "--gc_backend_services", default=False, type=bool, + help=("""Whether to GC backend services that are older + than --max_ci_deployment_resource_age_hours.""")) + + parser.add_argument( + "--max_ci_deployment_resource_age_hours", + default=24, type=int, + help=("The age of resources in kubeflow-ci-deployment to gc.")) + parser.add_argument( "--max_wf_age_hours", default=7*24, type=int, help=("How long to wait before garbage collecting Argo workflows.")) diff --git a/test-infra/ks_app/components/cleanup-ci-cron.jsonnet b/test-infra/ks_app/components/cleanup-ci-cron.jsonnet index a3c6a75143c..4c7e2256849 100644 --- a/test-infra/ks_app/components/cleanup-ci-cron.jsonnet +++ b/test-infra/ks_app/components/cleanup-ci-cron.jsonnet @@ -35,5 +35,5 @@ local job(project) = { std.prune(k.core.v1.list.new([ // Setup 2 cron jobs for the two projects. job("kubeflow-ci"), - job("kubeflow-ci-deployment"), + job("kubeflow-ci-deployment", true), ])) diff --git a/test-infra/ks_app/components/cleanup-ci.libsonnet b/test-infra/ks_app/components/cleanup-ci.libsonnet index 3fdf8a428bd..93b03a0be27 100644 --- a/test-infra/ks_app/components/cleanup-ci.libsonnet +++ b/test-infra/ks_app/components/cleanup-ci.libsonnet @@ -16,7 +16,7 @@ ) )], - jobSpec:: function(project="kubeflow-ci"){ + jobSpec:: function(project="kubeflow-ci", gcBackendServices=false){ "template": { "spec": { "containers": [ @@ -24,7 +24,7 @@ command: $.buildCommand([[ "/usr/local/bin/checkout.sh", "/src", - ], + ], [ "python", "-m", @@ -32,33 +32,34 @@ "--project=" + project, "all", "--delete_script=/src/kubeflow/kubeflow/scripts/gke/delete_deployment.sh", + "--gc_backend_services=" + gcBackendServices, ], - ]), - "image": "gcr.io/kubeflow-ci/test-worker:v20190415-53ad3b5-dirty-5bc1cf", + ]), + "image": "gcr.io/kubeflow-ci/test-worker:v20190415-53ad3b5-dirty-5bc1cf", "name": "label-sync", env: [ { name: "REPO_OWNER", - value: "kubeflow", + value: "kubeflow", }, { name: "REPO_NAME", - value: "testing", + value: "testing", }, { name: "PYTHONPATH", value: "/src/kubeflow/testing/py", }, { - name: "EXTRA_REPOS", + name: "EXTRA_REPOS", value: "kubeflow/kubeflow@HEAD", }, { name: "GOOGLE_APPLICATION_CREDENTIALS", value: "/secret/gcp-credentials/key.json", - }, + }, ], - "volumeMounts": [ + "volumeMounts": [ { name: "gcp-credentials", mountPath: "/secret/gcp-credentials", @@ -66,17 +67,17 @@ }, ] } - ], - "restartPolicy": "Never", + ], + "restartPolicy": "Never", "volumes": [ { name: "gcp-credentials", secret: { secretName: "kubeflow-testing-credentials", }, - }, + }, ] } } }, -} \ No newline at end of file +}