From 32c85437dc7580519b204e3297471d60561c448c Mon Sep 17 00:00:00 2001 From: gursewak1997 Date: Thu, 9 May 2024 03:03:50 -0700 Subject: [PATCH] Adding garbage removal for cloud uploads --- src/remote_prune | 177 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 src/remote_prune diff --git a/src/remote_prune b/src/remote_prune new file mode 100644 index 0000000000..b64f03fec5 --- /dev/null +++ b/src/remote_prune @@ -0,0 +1,177 @@ +#!/usr/bin/python3 -u + +# This script parses a policy.yaml file, which outlines the specific +# pruning actions required for each stream and the age threshold for +# deleting artifacts within them. +# Example of policy.yaml +# rawhide: +# # all cloud images +# cloud-uploads: time:2y +# # artifacts in meta.json's `images` key +# images: 2y +# images-keep: [qemu, live-iso] +# build: 3y + +import argparse +import subprocess +import json +import yaml +import collections +import datetime +import os +from dateutil.relativedelta import relativedelta + +# from cosalib.aliyun import remove_aliyun_image +# from cosalib.gcp import remove_gcp_image +# from cosalib.prune import fetch_build_meta, delete_build +from cosalib.s3 import S3 +from cosalib.aws import deregister_ami, delete_snapshot + +Build = collections.namedtuple("Build", ["id", "timestamp", "images", "arch"]) + +def main(): + parser = argparse.ArgumentParser(prog="coreos-assembler remote-prune") + parser.add_argument("--policy", default='./policy.yaml', type=str, + help="Path to policy.yaml file", required=True) + parser.add_argument("--dry-run", dest='dry_run', help="Don't actually delete anything", + action='store_true') + parser.add_argument("--stream", dest="stream", type=str, help="Fedora stream", required=True) + + parser.add_argument("--gcp-json-key", help="GCP Service Account JSON Auth", + default=os.environ.get("GCP_JSON_AUTH")) + parser.add_argument("--gcp-project", help="GCP Project name", + default=os.environ.get("GCP_PROJECT_NAME")) + parser.add_argument("--aws-credentials", dest="aws_credentials", help="AWS Credentials", + default=os.environ.get("AWS_CONFIG_FILE"), type=str) + + subparsers = parser.add_subparsers(dest='cmd', title='subcommands') + subparsers.required = True + + s3 = subparsers.add_parser('s3', help='Prune s3 buckets') + s3.add_argument("--bucket", help="Bucket name") + s3.add_argument("--prefix", help="Key prefix") + s3.add_argument("--force", help="Wipe s3 key ignoring the errors", + action='store_true') + + args = parser.parse_args() + + cloud_config = { + 'gcp': { + 'json-key': args.gcp_json_key, + 'project': args.gcp_project, + } + } + policy = open(f"{args.policy}") + policy = yaml.safe_load(policy) + stream = args.stream + + if stream in policy: + # If the build key is set in the policy file, then the cloud-uploads key must + # also be present, and the duration of cloud-uploads must be equal or shorter + if "build" in policy[stream].keys(): + cloudUploadsCheck(policy[stream]) + buildJsonData = getBuildsForStream(stream) + # action is basically whatever is needed to be pruned for the respective stream + for action in policy[stream]: + years = policy[stream][action] + #Ref date: Artifacts older than this date will be deleted + refDate = datetime.datetime.now() - relativedelta(years=int(years[:-1])) + # print(f"Reference Date for action {action} is {refDate}") + + for index, build in enumerate(buildJsonData["builds"]): + build_id = build["id"] + if "policy-cleanup" in build.keys(): + # If we have already pruned the spedified resources for this + # build as per builds.json, we skip through it. + if action in build["policy-cleanup"]: + print(f"The {build_id} has already had {action} pruning completed") + break + timestamp = build_id.split('.')[1] + buildDate = datetime.datetime(int(timestamp[0:4]), int(timestamp[4:6]), int(timestamp[-2:])) + if buildDate < refDate: + for arch in build["arches"]: + print(f"Pruning {years} old {action} for {build_id} for {arch} in {stream}") + buildFetch(args.stream, build_id, arch) + metaJson = open(f"builds/{build_id}/{arch}/meta.json") + metaData = json.load(metaJson) + + images = { + "amis": metaData.get("amis") or [], + "gcp": metaData.get("gcp") or [], + } + currentBuild = Build( + id=build_id, + timestamp=timestamp, + images=images, + arch=arch, + ) + match action: + case "cloud-uploads": + # Prune AWS, GCP (so far) + delete_cloud_resources(currentBuild, cloud_config, args.dry_run) + if not args.dry_run: + build.setdefault("policy-cleanup", []).append(action) + buildJsonData["builds"][index] = build + case "build": + print(f"Deleting key {args.prefix}{build.id} from bucket {args.bucket}") + # Delete the build's directory in S3 + S3().delete_object(args.bucket, f"{args.prefix}{str(currentBuild.id)}") + # pprint.pprint(buildJsonData['builds'], compact=True) + with open(f"builds/builds.json", "w") as json_file: + json_file.write(json.dumps(buildJsonData)) + else: + print(f"No pruning policy specified for {stream} stream in policy.yaml") + +# Handling just AWS at the moment for testing +def delete_cloud_resources(build, cloud_config, dry_run): + print(f"Deleting cloud uploads for {build.id}") + errors = [] + # Unregister AMIs and snapshots + if not build.images.get("amis", []): + print(f"No AMIs for {build.id} for {build.arch}") + for ami in build.images.get("amis", []): + region_name = ami.get("name") + ami_id = ami.get("hvm") + snapshot_id = ami.get("snapshot") + if dry_run: + print(f"Would delete {ami_id} and {snapshot_id}") + else: + if ami_id and region_name: + try: + deregister_ami(ami_id, region=region_name, dry_run=dry_run) + except Exception as e: + errors.append(e) + if snapshot_id and region_name: + try: + delete_snapshot(snapshot_id, region=region_name, dry_run=dry_run) + except Exception as e: + errors.append(e) + + if len(errors) != 0: + print(f"Found errors when removing build {build.id}:") + for e in errors: + raise Exception(e) + +def cloudUploadsCheck(actions): + if "cloud-uploads" in actions.keys(): + cloud_uploads_duration = actions["cloud-uploads"] + build_duration = actions["build"] + # assumption we are keeping the duration in years + assert cloud_uploads_duration < build_duration + else: + print(f"cloud-uploads must be set or be less than builds pruning duration in policy.yaml") + +def getBuildsForStream(stream): + buildFetchCmd = 'cosa buildfetch --stream='+ stream + ' --arch=all' + subprocess.check_output(['/bin/bash', '-i', '-c', buildFetchCmd]) + + f = open(f"builds/builds.json") + buildJsonData = json.load(f) + return buildJsonData + +def buildFetch(stream, build, arch): + buildFetchCmd = 'cosa buildfetch --stream=' + stream + ' --build=' + build + ' --arch=' + arch + subprocess.check_output(['/bin/bash', '-i', '-c', buildFetchCmd]) + +if __name__ == "__main__": + main()