forked from data-exp-lab/girder_ythub
-
Notifications
You must be signed in to change notification settings - Fork 5
/
data_map.py
92 lines (85 loc) · 2.6 KB
/
data_map.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from dataclasses import dataclass
from typing import Dict, List
dataMapDoc = {
"type": "object",
"description": (
"A container with a basic information about "
"a set of external data resources."
),
"properties": {
"dataId": {
"type": "string",
"description": "External dataset identificator, such as URL.",
},
"repository": {
"type": "string",
"description": "Name of a data repository holding the dataset.",
},
"doi": {"type": "string", "description": "Digital Object Identifier"},
"name": {
"type": "string",
"description": (
"A user-friendly name. Defaults to the name "
"provided by an external repository."
),
},
"size": {"type": "integer", "description": "Size of the dataset in bytes."},
"tale": {
"type": "boolean",
"description": "If True, external data resource is a Tale",
},
"base_url": {
"type": "string",
"description": "Optional CN url for DataONE datasets",
},
},
"required": ["dataId", "repository", "doi", "name", "size"],
"example": {
"base_url": "https://cn.dataone.org/cn/v2",
"dataId": "urn:uuid:42969280-e11c-41a9-92dc-33964bf785c8",
"doi": "10.5063/F1Z899CZ",
"name": (
"Data from a dynamically downscaled projection of past and "
"future microclimates covering North America from 1980-1999 "
"and 2080-2099"
),
"repository": "DataONE",
"size": 178679,
"tale": False,
},
}
@dataclass
class DataMap:
dataId: str
size: int
doi: str = None
name: str = None
repository: str = None
tale: bool = False
base_url: str = None
def toDict(self) -> Dict:
ret = {
"dataId": self.dataId,
"size": self.size,
"repository": self.repository,
"doi": self.doi,
"name": self.name,
"tale": self.tale,
}
if self.base_url:
ret["base_url"] = self.base_url
return ret
@staticmethod
def fromDict(d: Dict):
return DataMap(
d["dataId"],
d.get("size", 0),
repository=d["repository"],
doi=d.get("doi"),
name=d.get("name", "Unknown Dataset"),
tale=d.get("tale", False),
base_url=d.get("base_url"),
)
@staticmethod
def fromList(d: List[Dict]):
return [DataMap.fromDict(x) for x in d]