-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
73 lines (63 loc) · 2.16 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from typing import List
from time import sleep
from dhapi import get_endpoint
from models import *
from sys import argv
import os
import json
def json_dump_dir_creator(endpoint: str) -> str:
""" create json dump directory and return that path for use """
path = f'{endpoint}_dumps'
os.makedirs(path, exist_ok=True)
return path
def get_image_dumps(
endpoint: str,
tags: List[TagSchema],
repo_name: str,
dump_path: str):
""" dump image data into json file for searching through later """
for tag in tags:
tag_name = tag.name
params = ApiParameters()
image_endpoint = f'{endpoint}/{tag.name}/images'
response = get_endpoint(image_endpoint, params)
sleep(1)
file = os.path.join(
dump_path,
f"{dump_path}_{repo_name}_{tag_name}.json")
with open(file, 'w+') as f:
json.dump(response, f, indent=4)
def get_all_tags(
endpoint: str,
repositories: List[RepositorySchema],
dump_path: str):
""" Gets all tags passes to pass on and get dumps """
for repo in repositories:
tag_endpoint = f'{endpoint}/{repo.name}/tags'
tags = get_all_paginated_results(tag_endpoint)
repo_name = repo.name
get_image_dumps(tag_endpoint, tags, repo_name, dump_path)
def get_all_paginated_results(endpoint: str) -> List[ApiResponse]:
""" Get paginate results from dockerhub api """
params = ApiParameters()
if endpoint.split('/')[-1] == "tags":
params.page_size = 5
response = get_endpoint(endpoint, params)
results = response.results
page = 1
while response.next:
if endpoint.split('/')[-1] == "tags":
break
page += 1
params.page = page
response = get_endpoint(endpoint, params)
sleep(1)
results.extend(response.results)
# some of these have like 10000 tags, so I'm breaking after the first
# page.
return results
if __name__ == "__main__":
endpoint = argv[1]
dump_path = json_dump_dir_creator(endpoint)
images = get_all_paginated_results(endpoint)
get_all_tags(endpoint, images, dump_path)