-
Notifications
You must be signed in to change notification settings - Fork 167
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding garbage removal for cloud uploads
- Loading branch information
1 parent
596af28
commit 0c44c3c
Showing
1 changed file
with
198 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,198 @@ | ||
#!/usr/bin/python3 -u | ||
|
||
# This script parses a policy.yaml file, which outlines the specific | ||
# pruning actions required for each stream and the age threshold for | ||
# deleting artifacts within them. | ||
# Example of policy.yaml | ||
# rawhide: | ||
# # all cloud images | ||
# cloud-uploads: time:2y | ||
# # artifacts in meta.json's `images` key | ||
# images: 2y | ||
# images-keep: [qemu, live-iso] | ||
# build: 3y | ||
|
||
import argparse | ||
import subprocess | ||
import json | ||
import yaml | ||
import collections | ||
import datetime | ||
import os | ||
from dateutil.relativedelta import relativedelta | ||
import requests | ||
|
||
# from cosalib.aliyun import remove_aliyun_image | ||
# from cosalib.gcp import remove_gcp_image | ||
# from cosalib.prune import fetch_build_meta, delete_build | ||
from cosalib.s3 import S3 | ||
from cosalib.aws import deregister_ami, delete_snapshot | ||
|
||
Build = collections.namedtuple("Build", ["id", "timestamp", "images", "arch"]) | ||
|
||
def main(): | ||
parser = argparse.ArgumentParser(prog="coreos-assembler remote-prune") | ||
parser.add_argument("--policy", default='./policy.yaml', type=str, | ||
help="Path to policy.yaml file", required=True) | ||
parser.add_argument("--dry-run", dest='dry_run', help="Don't actually delete anything", | ||
action='store_true') | ||
parser.add_argument("--stream", dest="stream", type=str, help="Fedora stream", required=True) | ||
|
||
parser.add_argument("--gcp-json-key", help="GCP Service Account JSON Auth", | ||
default=os.environ.get("GCP_JSON_AUTH")) | ||
parser.add_argument("--gcp-project", help="GCP Project name", | ||
default=os.environ.get("GCP_PROJECT_NAME")) | ||
parser.add_argument("--aws-credentials", dest="aws_credentials", help="AWS Credentials", | ||
default=os.environ.get("AWS_CONFIG_FILE"), type=str) | ||
|
||
# subparsers = parser.add_subparsers(dest='cmd', title='subcommands') | ||
# subparsers.required = True | ||
|
||
# s3 = subparsers.add_parser('s3', help='Prune s3 buckets') | ||
# s3.add_argument("--bucket", help="Bucket name") | ||
# s3.add_argument("--prefix", help="Key prefix") | ||
# s3.add_argument("--force", help="Wipe s3 key ignoring the errors", | ||
# action='store_true') | ||
|
||
args = parser.parse_args() | ||
if args.aws_credentials: | ||
os.environ["AWS_CONFIG_FILE"] = args.aws_credentials | ||
cloud_config = { | ||
'gcp': { | ||
'json-key': args.gcp_json_key, | ||
'project': args.gcp_project, | ||
} | ||
} | ||
policy = open(f"{args.policy}") | ||
policy = yaml.safe_load(policy) | ||
stream = args.stream | ||
|
||
# These lists are up to date as of schema hash | ||
# 4c19aed3b3d84af278780bff63728510bb3e70613e4c4eef8cabd7939eb31bd8. If changing | ||
# this hash, ensure that the list of supported and unsupported artifacts below | ||
# is up to date. | ||
supported = ["amis"] | ||
unsupported = ["base-oscontainer", "aliyun", "azure", "azurestack", "digitalocean", "exoscale", "gcp", "kubevirt", "hyperv", "ibmcloud"] | ||
|
||
if stream in policy: | ||
# If the build key is set in the policy file, then the cloud-uploads key must | ||
# also be present, and the duration of cloud-uploads must be equal or shorter | ||
if "build" in policy[stream].keys(): | ||
cloudUploadsCheck(policy[stream]) | ||
BASE_URL = "https://builds.coreos.fedoraproject.org/prod/streams/" + stream | ||
buildJsonData = getBuildsForStream(stream) | ||
# action is basically whatever is needed to be pruned for the respective stream | ||
for action in policy[stream]: | ||
duration = policy[stream][action] | ||
if duration.split(" ")[1] == ("years" or "year"): | ||
refDate = datetime.datetime.now() - relativedelta(years=int(duration.split(" ")[0])) | ||
elif duration.split(" ")[1] == ("months" or "month"): | ||
refDate = datetime.datetime.now() - relativedelta(months=int(duration.split(" ")[0])) | ||
else: | ||
print(f"Duration of pruning for resources is only supported in years or months") | ||
break | ||
# print(f"Reference Date for action {action} is {refDate}") | ||
|
||
print(f"Pruning {duration} old {action} for {stream} builds") | ||
# Enumerating in reverse to go from the oldest build to the newest one | ||
for index, build in reversed(list(enumerate(buildJsonData["builds"]))): | ||
build_id = build["id"] | ||
if "policy-cleanup" in build.keys(): | ||
# If we have already pruned the spedified resources for this | ||
# build as per builds.json, we skip through it. | ||
if action in build["policy-cleanup"]: | ||
print(f"The {build_id} has already had {action} pruning completed") | ||
break | ||
timestamp = build_id.split('.')[1] | ||
buildDate = datetime.datetime(int(timestamp[0:4]), int(timestamp[4:6]), int(timestamp[-2:])) | ||
if buildDate < refDate: | ||
for arch in build["arches"]: | ||
meta_url = f"{BASE_URL}/builds/{build_id}/{arch}/meta.json" | ||
meta_json = fetch_json(meta_url) | ||
images = { | ||
"amis": meta_json.get("amis") or [], | ||
"gcp": meta_json.get("gcp") or [], | ||
} | ||
currentBuild = Build( | ||
id=build_id, | ||
timestamp=timestamp, | ||
images=images, | ||
arch=arch, | ||
) | ||
match action: | ||
case "cloud-uploads": | ||
# Prunes only AWS at the moment | ||
delete_cloud_resources(currentBuild, cloud_config, args.dry_run) | ||
if not args.dry_run: | ||
build.setdefault("policy-cleanup", []).append(action) | ||
buildJsonData["builds"][index] = build | ||
case "build": | ||
print(f"Deleting key {args.prefix}{build.id} from bucket {args.bucket}") | ||
# Delete the build's directory in S3 | ||
S3().delete_object(args.bucket, f"{args.prefix}{str(currentBuild.id)}") | ||
# pprint.pprint(buildJsonData['builds'], compact=True) | ||
with open(f"builds/builds.json", "w") as json_file: | ||
json_file.write(json.dumps(buildJsonData)) | ||
else: | ||
print(f"No pruning policy specified for {stream} stream in policy.yaml") | ||
|
||
# Handling just AWS amis/snapshots at the moment | ||
def delete_cloud_resources(build, cloud_config, dry_run): | ||
errors = [] | ||
totalBuildAMIs = len(build.images.get("amis") or '') | ||
# Unregister AMIs and snapshots | ||
if not build.images.get("amis", []): | ||
print(f"No AMIs for {build.id} for {build.arch}") | ||
if dry_run: | ||
print(f"Would delete {totalBuildAMIs} amis/snapshots for {build.id} for {build.arch}") | ||
return | ||
|
||
for ami in build.images.get("amis", []): | ||
print(f"Deleting cloud uploads for {build.id}") | ||
region_name = ami.get("name") | ||
ami_id = ami.get("hvm") | ||
snapshot_id = ami.get("snapshot") | ||
if ami_id and region_name: | ||
try: | ||
deregister_ami(ami_id, region=region_name, dry_run=dry_run) | ||
except Exception as e: | ||
errors.append(e) | ||
if snapshot_id and region_name: | ||
try: | ||
delete_snapshot(snapshot_id, region=region_name, dry_run=dry_run) | ||
except Exception as e: | ||
errors.append(e) | ||
|
||
if len(errors) != 0: | ||
print(f"Found errors when removing build {build.id}:") | ||
for e in errors: | ||
raise Exception(e) | ||
|
||
def cloudUploadsCheck(actions): | ||
if "cloud-uploads" in actions.keys(): | ||
cloud_uploads_duration = actions["cloud-uploads"] | ||
build_duration = actions["build"] | ||
# assumption we are keeping the duration in years | ||
assert cloud_uploads_duration < build_duration | ||
else: | ||
print(f"cloud-uploads must be set or be less than builds pruning duration in policy.yaml") | ||
|
||
def getBuildsForStream(stream): | ||
buildFetchCmd = 'cosa buildfetch --stream='+ stream + ' --arch=all' | ||
subprocess.check_output(['/bin/bash', '-i', '-c', buildFetchCmd]) | ||
|
||
f = open(f"builds/builds.json") | ||
buildJsonData = json.load(f) | ||
return buildJsonData | ||
|
||
# def fetchMetadata(stream, build, arch): | ||
# buildFetchCmd = 'cosa buildfetch --stream=' + stream + ' --build=' + build + ' --arch=' + arch | ||
# subprocess.check_output(['/bin/bash', '-i', '-c', buildFetchCmd]) | ||
|
||
def fetch_json(url): | ||
response = requests.get(url) | ||
response.raise_for_status() | ||
return response.json() | ||
|
||
if __name__ == "__main__": | ||
main() |