Skip to content

Commit ea78d55

Browse files
committed
cmd-cloud-prune: GC images/builds for builds
Extend the garbage collection to the images and whole builds. We will prune all the images apart from what is specified in the images_keep list for each stream in gc-policy.yaml. For pruning the whole builds, we will delete all the resources in s3 for that build and add those builds under tombstone-builds in the respective builds.json
1 parent d3302e0 commit ea78d55

File tree

1 file changed

+106
-25
lines changed

1 file changed

+106
-25
lines changed

src/cmd-cloud-prune

Lines changed: 106 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,11 @@
2222
# "arches": [
2323
# "x86_64"
2424
# ],
25-
# "policy-cleanup": [
26-
# "cloud-uploads",
25+
# "policy-cleanup": {
26+
# "cloud-uploads": true,
27+
# "images": true,
2728
# "images-kept": ["qemu", "live-iso"]
28-
# ]
29+
# }
2930
# }
3031
#
3132
# We should also prune unreferenced build directories here. See also
@@ -40,6 +41,7 @@ import collections
4041
import datetime
4142
import os
4243
import boto3
44+
import botocore
4345
from dateutil.relativedelta import relativedelta
4446
from cosalib.gcp import remove_gcp_image
4547
from cosalib.aws import deregister_aws_resource
@@ -51,6 +53,12 @@ from cosalib.cmdlib import convert_duration_to_days
5153
Build = collections.namedtuple("Build", ["id", "images", "arch", "meta_json"])
5254
# set metadata caching to 5m
5355
CACHE_MAX_AGE_METADATA = 60 * 5
56+
# These lists are up to date as of schema hash
57+
# 4c19aed3b3d84af278780bff63728510bb3e70613e4c4eef8cabd7939eb31bd8. If changing
58+
# this hash, ensure that the list of SUPPORTED and UNSUPPORTED artifacts below
59+
# is up to date.
60+
SUPPORTED = ["amis", "gcp"]
61+
UNSUPPORTED = ["aliyun", "azurestack", "digitalocean", "exoscale", "ibmcloud", "powervs", "azure"]
5462

5563

5664
def parse_args():
@@ -88,13 +96,6 @@ def main():
8896
# This copies the local builds.json and updates the S3 bucket version.
8997
return handle_upload_builds_json(s3_client, bucket, prefix, args.dry_run, args.acl)
9098

91-
# These lists are up to date as of schema hash
92-
# 4c19aed3b3d84af278780bff63728510bb3e70613e4c4eef8cabd7939eb31bd8. If changing
93-
# this hash, ensure that the list of supported and unsupported artifacts below
94-
# is up to date.
95-
supported = ["amis", "gcp"]
96-
unsupported = ["aliyun", "azurestack", "digitalocean", "exoscale", "ibmcloud", "powervs", "azure"]
97-
9899
with open(args.policy, "r") as f:
99100
policy = yaml.safe_load(f)
100101
if stream in policy:
@@ -114,36 +115,72 @@ def main():
114115
continue
115116
duration = convert_duration_to_days(policy[stream][action])
116117
ref_date = today_date - relativedelta(days=int(duration))
118+
pruned_build_ids = []
119+
images_to_keep = policy.get(stream, {}).get("images-keep", [])
117120

118121
print(f"Pruning resources of type {action} older than {policy[stream][action]} ({ref_date.date()}) on stream {stream}")
119122
# Enumerating in reverse to go from the oldest build to the newest one
120123
for build in reversed(builds):
121124
build_id = build["id"]
122-
if action in build.get("policy-cleanup", []):
123-
print(f"Build {build_id} has already had {action} pruning completed")
124-
continue
125125
(build_date, _) = parse_fcos_version_to_timestamp_and_stream(build_id)
126-
127126
if build_date >= ref_date:
128127
break
128+
129+
previous_cleanup = build.get("policy-cleanup", {})
130+
if action in previous_cleanup:
131+
# If we are in here then there has been some previous cleanup of
132+
# this type run for this build. For all types except `images` we
133+
# can just continue.
134+
if action != "images":
135+
print(f"Build {build_id} has already had {action} pruning completed")
136+
continue
137+
else:
138+
# OK `images` has been pruned before, but we need to check
139+
# that all the images were pruned that match the current policy.
140+
# i.e. there may be additional images we need prune
141+
previous_images_kept = previous_cleanup.get("images-kept", [])
142+
if set(images_to_keep) == set(previous_images_kept):
143+
print(f"Build {build_id} has already had {action} pruning completed")
144+
continue
145+
129146
for arch in build["arches"]:
147+
print(f"Pruning {arch} {action} for {build_id}")
130148
meta_prefix = os.path.join(prefix, f"{build_id}/{arch}/meta.json")
131149
meta_json = get_json_from_s3(s3_client, bucket, meta_prefix)
132150
# Make sure the meta.json doesn't contain any cloud_platform that is not supported for pruning yet.
133-
images = get_supported_images(meta_json, unsupported, supported)
151+
images = get_supported_images(meta_json)
134152
current_build = Build(id=build_id, images=images, arch=arch, meta_json=meta_json)
135153

136154
match action:
137155
case "cloud-uploads":
138156
prune_cloud_uploads(current_build, cloud_config, args.dry_run)
139-
case "build":
140-
raise NotImplementedError
141-
# print(f"Deleting key {prefix}{build.id} from bucket {bucket}")
142-
# Delete the build's directory in S3
143-
# S3().delete_object(args.bucket, f"{args.prefix}{str(current_build.id)}")
157+
# Prune through images that are not mentioned in images-keep
144158
case "images":
145-
raise NotImplementedError
146-
build.setdefault("policy-cleanup", []).append("cloud-uploads")
159+
prune_images(s3_client, current_build, images_to_keep, args.dry_run, bucket, prefix)
160+
# Fully prune releases that are very old including deleting the directory in s3 for that build.
161+
case "build":
162+
prune_build(s3_client, bucket, prefix, build_id, args.dry_run)
163+
pruned_build_ids.append(build_id)
164+
# Update policy-cleanup after processing all arches for the build
165+
policy_cleanup = build.setdefault("policy-cleanup", {})
166+
match action:
167+
case "cloud-uploads":
168+
if "cloud-uploads" not in policy_cleanup:
169+
policy_cleanup["cloud-uploads"] = True
170+
case "images":
171+
if "images" not in policy_cleanup:
172+
policy_cleanup["images"] = True
173+
policy_cleanup["images-kept"] = images_to_keep
174+
175+
if pruned_build_ids:
176+
if "tombstone-builds" not in builds_json_data:
177+
builds_json_data["tombstone-builds"] = []
178+
# Separate the builds into remaining builds and tombstone builds
179+
remaining_builds = [build for build in builds if build["id"] not in pruned_build_ids]
180+
tombstone_builds = [build for build in builds if build["id"] in pruned_build_ids]
181+
# Update the data structure
182+
builds_json_data["builds"] = remaining_builds
183+
builds_json_data["tombstone-builds"].extend(tombstone_builds)
147184

148185
# Save the updated builds.json to local builds/builds.json
149186
save_builds_json(builds_json_data, BUILDFILES['list'])
@@ -181,13 +218,15 @@ def validate_policy(stream, policy):
181218
raise Exception("Duration of pruning cloud-uploads must be less than or equal to pruning a build")
182219

183220

184-
def get_supported_images(meta_json, unsupported, supported):
221+
def get_supported_images(meta_json):
185222
images = {}
186223
for key in meta_json:
187-
if key in unsupported:
224+
if key in UNSUPPORTED:
188225
raise Exception(f"The platform {key} is not supported")
189-
if key in supported:
226+
if key in SUPPORTED:
190227
images[key] = meta_json[key]
228+
else:
229+
raise Exception(f"The platform {key} is neither in supported nor unsupported artifacts.")
191230
return images
192231

193232

@@ -320,5 +359,47 @@ def delete_gcp_image(build, cloud_config, dry_run):
320359
return errors
321360

322361

362+
def prune_images(s3, build, images_to_keep, dry_run, bucket, prefix):
363+
images_from_meta_json = build.meta_json.get("images", [])
364+
# Get the image names and paths currently in meta.json
365+
current_images_data = [(name, data.get("path")) for name, data in images_from_meta_json.items()]
366+
errors = []
367+
368+
for name, path in current_images_data:
369+
if name not in images_to_keep:
370+
image_prefix = os.path.join(prefix, f"{build.id}/{build.arch}/{path}")
371+
if dry_run:
372+
print(f"Would prune {bucket}/{image_prefix}")
373+
else:
374+
try:
375+
s3.delete_object(Bucket=bucket, Key=image_prefix)
376+
print(f"Pruned {name} image for {build.id} for {build.arch}")
377+
except botocore.exceptions.ClientError as e:
378+
if e.response['Error']['Code'] == 'NoSuchKey':
379+
print(f"{bucket}/{image_prefix} already pruned.")
380+
else:
381+
errors.append(e)
382+
if errors:
383+
print(f"Found errors when pruning images for {build.id}:")
384+
for e in errors:
385+
print(e)
386+
raise Exception("Some errors were encountered")
387+
388+
389+
def prune_build(bucket, prefix, build_id, dry_run, s3_client):
390+
build_prefix = os.path.join(prefix, f"{build_id}/")
391+
if dry_run:
392+
print(f"Would delete all resources in {bucket}/{build_prefix}.")
393+
else:
394+
try:
395+
bucket.objects.filter(Prefix=build_prefix).delete()
396+
print(f"Pruned {build_id} completely from s3")
397+
except botocore.exceptions.ClientError as e:
398+
if e.response['Error']['Code'] == 'NoSuchKey':
399+
print(f"{bucket}/{build_prefix} already pruned.")
400+
else:
401+
raise Exception(f"Error pruning {build_id}: {e.response['Error']['Message']}")
402+
403+
323404
if __name__ == "__main__":
324405
main()

0 commit comments

Comments
 (0)