Skip to content

Commit

Permalink
Adding garbage removal for cloud uploads
Browse files Browse the repository at this point in the history
  • Loading branch information
gursewak1997 committed Jun 13, 2024
1 parent 596af28 commit 0c44c3c
Showing 1 changed file with 198 additions and 0 deletions.
198 changes: 198 additions & 0 deletions src/remote_prune
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
#!/usr/bin/python3 -u

# This script parses a policy.yaml file, which outlines the specific
# pruning actions required for each stream and the age threshold for
# deleting artifacts within them.
# Example of policy.yaml
# rawhide:
# # all cloud images
# cloud-uploads: time:2y
# # artifacts in meta.json's `images` key
# images: 2y
# images-keep: [qemu, live-iso]
# build: 3y

import argparse
import subprocess
import json
import yaml
import collections
import datetime
import os
from dateutil.relativedelta import relativedelta
import requests

# from cosalib.aliyun import remove_aliyun_image
# from cosalib.gcp import remove_gcp_image
# from cosalib.prune import fetch_build_meta, delete_build
from cosalib.s3 import S3
from cosalib.aws import deregister_ami, delete_snapshot

Build = collections.namedtuple("Build", ["id", "timestamp", "images", "arch"])

def main():
parser = argparse.ArgumentParser(prog="coreos-assembler remote-prune")
parser.add_argument("--policy", default='./policy.yaml', type=str,
help="Path to policy.yaml file", required=True)
parser.add_argument("--dry-run", dest='dry_run', help="Don't actually delete anything",
action='store_true')
parser.add_argument("--stream", dest="stream", type=str, help="Fedora stream", required=True)

parser.add_argument("--gcp-json-key", help="GCP Service Account JSON Auth",
default=os.environ.get("GCP_JSON_AUTH"))
parser.add_argument("--gcp-project", help="GCP Project name",
default=os.environ.get("GCP_PROJECT_NAME"))
parser.add_argument("--aws-credentials", dest="aws_credentials", help="AWS Credentials",
default=os.environ.get("AWS_CONFIG_FILE"), type=str)

# subparsers = parser.add_subparsers(dest='cmd', title='subcommands')
# subparsers.required = True

# s3 = subparsers.add_parser('s3', help='Prune s3 buckets')
# s3.add_argument("--bucket", help="Bucket name")
# s3.add_argument("--prefix", help="Key prefix")
# s3.add_argument("--force", help="Wipe s3 key ignoring the errors",
# action='store_true')

args = parser.parse_args()
if args.aws_credentials:
os.environ["AWS_CONFIG_FILE"] = args.aws_credentials
cloud_config = {
'gcp': {
'json-key': args.gcp_json_key,
'project': args.gcp_project,
}
}
policy = open(f"{args.policy}")
policy = yaml.safe_load(policy)
stream = args.stream

# These lists are up to date as of schema hash
# 4c19aed3b3d84af278780bff63728510bb3e70613e4c4eef8cabd7939eb31bd8. If changing
# this hash, ensure that the list of supported and unsupported artifacts below
# is up to date.
supported = ["amis"]
unsupported = ["base-oscontainer", "aliyun", "azure", "azurestack", "digitalocean", "exoscale", "gcp", "kubevirt", "hyperv", "ibmcloud"]

if stream in policy:
# If the build key is set in the policy file, then the cloud-uploads key must
# also be present, and the duration of cloud-uploads must be equal or shorter
if "build" in policy[stream].keys():
cloudUploadsCheck(policy[stream])
BASE_URL = "https://builds.coreos.fedoraproject.org/prod/streams/" + stream
buildJsonData = getBuildsForStream(stream)
# action is basically whatever is needed to be pruned for the respective stream
for action in policy[stream]:
duration = policy[stream][action]
if duration.split(" ")[1] == ("years" or "year"):
refDate = datetime.datetime.now() - relativedelta(years=int(duration.split(" ")[0]))
elif duration.split(" ")[1] == ("months" or "month"):
refDate = datetime.datetime.now() - relativedelta(months=int(duration.split(" ")[0]))
else:
print(f"Duration of pruning for resources is only supported in years or months")
break
# print(f"Reference Date for action {action} is {refDate}")

print(f"Pruning {duration} old {action} for {stream} builds")
# Enumerating in reverse to go from the oldest build to the newest one
for index, build in reversed(list(enumerate(buildJsonData["builds"]))):
build_id = build["id"]
if "policy-cleanup" in build.keys():
# If we have already pruned the spedified resources for this
# build as per builds.json, we skip through it.
if action in build["policy-cleanup"]:
print(f"The {build_id} has already had {action} pruning completed")
break
timestamp = build_id.split('.')[1]
buildDate = datetime.datetime(int(timestamp[0:4]), int(timestamp[4:6]), int(timestamp[-2:]))
if buildDate < refDate:
for arch in build["arches"]:
meta_url = f"{BASE_URL}/builds/{build_id}/{arch}/meta.json"
meta_json = fetch_json(meta_url)
images = {
"amis": meta_json.get("amis") or [],
"gcp": meta_json.get("gcp") or [],
}
currentBuild = Build(
id=build_id,
timestamp=timestamp,
images=images,
arch=arch,
)
match action:
case "cloud-uploads":
# Prunes only AWS at the moment
delete_cloud_resources(currentBuild, cloud_config, args.dry_run)
if not args.dry_run:
build.setdefault("policy-cleanup", []).append(action)
buildJsonData["builds"][index] = build
case "build":
print(f"Deleting key {args.prefix}{build.id} from bucket {args.bucket}")
# Delete the build's directory in S3
S3().delete_object(args.bucket, f"{args.prefix}{str(currentBuild.id)}")
# pprint.pprint(buildJsonData['builds'], compact=True)
with open(f"builds/builds.json", "w") as json_file:
json_file.write(json.dumps(buildJsonData))
else:
print(f"No pruning policy specified for {stream} stream in policy.yaml")

# Handling just AWS amis/snapshots at the moment
def delete_cloud_resources(build, cloud_config, dry_run):
errors = []
totalBuildAMIs = len(build.images.get("amis") or '')
# Unregister AMIs and snapshots
if not build.images.get("amis", []):
print(f"No AMIs for {build.id} for {build.arch}")
if dry_run:
print(f"Would delete {totalBuildAMIs} amis/snapshots for {build.id} for {build.arch}")
return

for ami in build.images.get("amis", []):
print(f"Deleting cloud uploads for {build.id}")
region_name = ami.get("name")
ami_id = ami.get("hvm")
snapshot_id = ami.get("snapshot")
if ami_id and region_name:
try:
deregister_ami(ami_id, region=region_name, dry_run=dry_run)
except Exception as e:
errors.append(e)
if snapshot_id and region_name:
try:
delete_snapshot(snapshot_id, region=region_name, dry_run=dry_run)
except Exception as e:
errors.append(e)

if len(errors) != 0:
print(f"Found errors when removing build {build.id}:")
for e in errors:
raise Exception(e)

def cloudUploadsCheck(actions):
if "cloud-uploads" in actions.keys():
cloud_uploads_duration = actions["cloud-uploads"]
build_duration = actions["build"]
# assumption we are keeping the duration in years
assert cloud_uploads_duration < build_duration
else:
print(f"cloud-uploads must be set or be less than builds pruning duration in policy.yaml")

def getBuildsForStream(stream):
buildFetchCmd = 'cosa buildfetch --stream='+ stream + ' --arch=all'
subprocess.check_output(['/bin/bash', '-i', '-c', buildFetchCmd])

f = open(f"builds/builds.json")
buildJsonData = json.load(f)
return buildJsonData

# def fetchMetadata(stream, build, arch):
# buildFetchCmd = 'cosa buildfetch --stream=' + stream + ' --build=' + build + ' --arch=' + arch
# subprocess.check_output(['/bin/bash', '-i', '-c', buildFetchCmd])

def fetch_json(url):
response = requests.get(url)
response.raise_for_status()
return response.json()

if __name__ == "__main__":
main()

0 comments on commit 0c44c3c

Please sign in to comment.