forked from data-exp-lab/girder_ythub
-
Notifications
You must be signed in to change notification settings - Fork 5
/
import_providers.py
167 lines (137 loc) · 6.11 KB
/
import_providers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import textwrap
from girder import logger
from girder.utility.model_importer import ModelImporter
from .entity import Entity
from .data_map import DataMap
from .file_map import FileMap
from .import_item import ImportItem
class ImportProvider:
_regex = None
def __init__(self, name):
self.name = name
self.folderModel = ModelImporter.model('folder')
self.itemModel = ModelImporter.model('item')
self.fileModel = ModelImporter.model('file')
@property
def regex(self):
"""Regular expression used to determine if provider matches url"""
if not self._regex:
self._regex = self.create_regex()
if not isinstance(self._regex, list):
self._regex = [self._regex]
return self._regex
def create_regex(self):
"""Create and initialize regular expression used for matching"""
raise NotImplementedError()
def getName(self) -> str:
return self.name
def matches(self, entity: Entity) -> bool:
return any(regex.match(entity.getValue()) for regex in self.regex)
def lookup(self, entity: Entity) -> DataMap:
raise NotImplementedError()
def listFiles(self, entity: Entity) -> FileMap:
raise NotImplementedError()
def getDatasetUID(self, doc: object, user: object) -> str:
"""Given a registered object, return dataset DOI"""
raise NotImplementedError()
def getURI(self, doc: object, user: object) -> str:
"""Given a registered object, return a URI for it"""
raise NotImplementedError()
def import_tale(self, dataId: str, user: object, force=False) -> object:
"""Given a dataId import dataset as Tale"""
raise NotImplementedError()
def proto_tale_from_datamap(self, dataMap: DataMap, user: object, asTale: bool) -> object:
if asTale:
relation = "IsDerivedFrom"
else:
relation = "Cites"
related_id = [
{
"relation": relation,
"identifier": dataMap.doi or dataMap.dataId
}
]
long_name = dataMap.name
long_name = long_name.replace('-', ' ').replace('_', ' ')
shortened_name = textwrap.shorten(text=long_name, width=30)
return {
"relatedIdentifiers": related_id,
"title": f"A Tale for \"{shortened_name}\"",
"category": "science",
}
def register(self, parent: object, parentType: str, progress, user, dataMap: DataMap,
base_url: str = None):
stack = [(parent, parentType)]
pid = dataMap.dataId
name = dataMap.name
rootObj = None
rootType = None
for item in self._listRecursive(user, pid, name, base_url, progress=progress):
if item.type == ImportItem.FOLDER:
(obj, objType) = self._registerFolder(stack, item, user)
elif item.type == ImportItem.END_FOLDER:
stack.pop()
elif item.type == ImportItem.FILE:
(obj, objType) = self._registerFile(stack, item, user)
else:
raise Exception('Unknown import item type: %s' % item.type)
if rootObj is None:
rootObj = obj
rootType = objType
return rootType, rootObj
def _registerFolder(self, stack, item: ImportItem, user):
(parent, parentType) = stack[-1]
folder = self.folderModel.createFolder(parent, item.name, description='',
parentType=parentType, creator=user,
reuseExisting=True)
meta = {
"identifier": item.identifier,
"provider": self.name,
}
if item.meta:
meta.update(item.meta)
folder = self.folderModel.setMetadata(folder, meta)
stack.append((folder, 'folder'))
return (folder, 'folder')
def _registerFile(self, stack, item: ImportItem, user):
(parent, parentType) = stack[-1]
gitem = self.itemModel.createItem(item.name, user, parent, reuseExisting=True)
if self.fileModel.findOne({"itemId": gitem["_id"]}):
logger.info(f"Item ({gitem['_id']=}, {gitem['name']=}) already has a file.")
return (gitem, 'item')
meta = {'provider': self.name}
if item.identifier:
meta['identifier'] = item.identifier
if item.meta:
meta.update(item.meta)
gitem = self.itemModel.setMetadata(gitem, meta)
if item.url and item.url.startswith('file://'):
with open(item.url[len('file://'):], 'rb') as f:
ModelImporter.model('upload').uploadFromFile(f, item.size, item.name, parent=gitem,
parentType='item', user=user,
mimeType=item.mimeType)
else:
# girder does not allow anything else than http and https. So we need a better
# mechanism here to communicate relevant information to WTDM
self.fileModel.createLinkFile(item.name, url=item.url, parent=gitem, parentType='item',
creator=user, size=item.size, mimeType=item.mimeType,
reuseExisting=True)
return (gitem, 'item')
def _listRecursive(self, user, pid: str, name: str, base_url: str = None, progress=None):
raise NotImplementedError()
def check_auth(self, user):
pass
class ImportProviders:
def __init__(self):
self.providers = []
self.providerMap = {}
def addProvider(self, provider: ImportProvider):
self.providers.append(provider)
self.providerMap[provider.name] = provider
def getProvider(self, entity: Entity) -> ImportProvider:
for provider in self.providers:
if provider.matches(entity):
return provider
raise Exception('Could not find suitable provider for entity %s' % entity)
def getFromDataMap(self, dataMap: DataMap) -> ImportProvider:
return self.providerMap[dataMap.repository]