-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathupdate-assets-on-server
executable file
·140 lines (118 loc) · 4.47 KB
/
update-assets-on-server
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python
"""Helper to re-extract metadata and update (mint new) assets with that metadata
Composed by Satra (with only little changes by yoh).
Initially based on code in dandisets' backups2datalad.py code for updating
as a part of that script but it was intefering with the updates to datalad thus
extracted into a separate script.
"""
from getpass import getpass
import logging
import os
import sys
import click
from dandischema.consts import DANDI_SCHEMA_VERSION
import requests
from dandi.dandiapi import DandiAPIClient
from dandi.metadata import get_default_metadata, nwb2asset
from dandi.misctypes import Digest
from dandi.support.digests import get_digest
logging.basicConfig(
stream=sys.stdout,
level=logging.INFO,
format="[%(asctime)s] %(levelname)s - %(message)s",
)
ul = logging.getLogger("UL")
# location on drogon
blobdir = "/mnt/backup/dandi/dandiarchive-s3-backup/blobs"
def get_meta(path, digest=None):
try:
if digest is None:
digest = get_digest(path, digest="dandi-etag")
localmeta = nwb2asset(path, digest=Digest.dandi_etag(digest))
except Exception as e:
ul.error(f"Error {e} getting {path}")
localmeta = get_default_metadata(path, digest=Digest.dandi_etag(digest))
return localmeta.json_dict()
@click.command(help="""Update assetmetadata""")
@click.option(
"-d",
"--dandiset",
type=str,
help="Dandiset to update",
)
@click.option(
"-u", "--update", default=False, is_flag=True, help="Whether to actually update"
)
@click.option(
"-a",
"--api_key",
type=str,
default=os.environ.get("DANDI_API_KEY", ""),
help="API key to use",
)
def process_dandiset(dandiset, update, api_key):
api_key = api_key or getpass("API KEY: ")
dapi = DandiAPIClient()
url = "https://api.dandiarchive.org/api/blobs/digest/"
headers = {"Accept": "application/json", "Content-Type": "application/json"}
ds = dapi.get_dandiset(dandiset, "draft")
ul.info(f"processing dandiset: {dandiset}")
assets = []
for ra in ds.get_assets():
asset = ra.get_raw_metadata()
asset.update(**ra.json_dict())
payload = {
"algorithm": "dandi:dandi-etag",
"value": asset["digest"]["dandi:dandi-etag"],
}
response = requests.request("POST", url, json=payload, headers=headers)
blob_info = response.json()
assert asset["contentSize"] == blob_info["size"]
asset.update(**blob_info)
assets.append(asset)
ul.info(f"Total assets: {len(assets)}")
missing = []
couldnotupdate = []
for asset in assets:
blob_id = asset["blob_id"]
localpath = f"{blobdir}/{blob_id[:3]}/{blob_id[3:6]}/{blob_id}"
if not os.path.exists(localpath):
missing.append(asset)
if missing:
ul.error(f"could not find these blobs locally: {missing}")
ul.error(len(missing))
else:
for asset in assets:
blob_id = asset["blob_id"]
localpath = f"{blobdir}/{blob_id[:3]}/{blob_id[3:6]}/{blob_id}"
if DANDI_SCHEMA_VERSION != asset.get("schemaVersion", ""):
ul.info("Getting metadata")
localmeta = get_meta(localpath, asset["digest"]["dandi:dandi-etag"])
ul.info("Finished getting metadata")
localmeta["path"] = asset["path"]
localmeta["blobDateModified"] = asset["blobDateModified"]
url = (
f"https://api.dandiarchive.org/api/dandisets/{dandiset}/"
f"versions/draft/assets/{asset['asset_id']}/"
)
payload = {"metadata": localmeta, "blob_id": asset["blob_id"]}
if update:
ul.info(f"updating: {asset['path']}")
# print(url, payload)
headers["Authorization"] = f"token {api_key}"
response = requests.request(
"PUT", url, json=payload, headers=headers
)
ul.info("Finished updating")
if not response.ok:
ul.error(response.text, url)
couldnotupdate.append(asset)
ul.error(f"couldnotupdate: {couldnotupdate}")
newassets = []
for ra in ds.get_assets():
asset = ra.get_raw_metadata()
asset.update(**ra.json_dict())
newassets.append(asset)
ul.info(f"New assets: {len(newassets)}")
if __name__ == "__main__":
process_dandiset()