-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathcontent.py
320 lines (252 loc) · 10.4 KB
/
content.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
"""Content that can be loaded into Stardog.
"""
import contextlib
import os
import uuid
from typing import Optional
import requests
from . import content_types as content_types
class Content:
"""Content base class."""
pass
class Raw(Content):
"""User-defined content."""
def __init__(
self,
content: object,
content_type: Optional[str] = None,
content_encoding: Optional[str] = None,
name: Optional[str] = None,
):
"""Initializes a Raw object.
:param content: Object representing the content (e.g., str, file)
:param content_type: Content type
:param content_encoding: Content encoding
:param name: Object name
Examples:
>>> Raw(':luke a :Human', 'text/turtle', name='data.ttl')
>>> Raw(':βüãäoñr̈ a :Employee .'.encode('utf-8'), 'text/turtle')
"""
self.raw = content
self.name = name
(c_enc, c_type) = content_types.guess_rdf_format(name)
self.content_type = content_type if content_type else c_type
self.content_encoding = content_encoding if content_encoding else c_enc
@contextlib.contextmanager
def data(self):
yield self.raw
class File(Content):
"""File-based content."""
def __init__(
self,
file: Optional[str] = None,
content_type: Optional[str] = None,
content_encoding: Optional[str] = None,
name: Optional[str] = None,
fname: Optional[str] = None,
):
"""Initializes a File object.
:param file: the filename/path of the file
:param content_type: Content type.
It will be automatically detected from the filename
:param content_encoding: Content encoding.
It will be automatically detected from the filename
:param name: Name of the file object.
It will be automatically detected from the filename
:param fname: backward compatible parameter for ``file``
Examples:
>>> File('data.ttl')
>>> File('data.doc', 'application/msword')
"""
# file as a special meaning in IDE such as pycharm where it shows you a file picker. It helps you find the file
# which is important for this type of call, but we need to be backward compatible in case they use fname=
if fname:
file = fname
assert file, "Parameter file is required"
self.fname = file
(c_enc, c_type) = content_types.guess_rdf_format(file)
self.content_type = content_type if content_type else c_type
self.content_encoding = content_encoding if content_encoding else c_enc
self.name = name if name else os.path.basename(file)
@contextlib.contextmanager
def data(self):
with open(self.fname, "rb") as f:
yield f
class MappingRaw(Content):
"""User-defined Mapping."""
def __init__(
self, content: str, syntax: Optional[str] = None, name: Optional[str] = None
):
"""Initializes a MappingRaw object.
:param content: the actual mapping content (e.g. ``'MAPPING\\n FROM SQL ...'``)
:param syntax: The mapping syntax (``'STARDOG'``, ``'R2RML'``, or ``'SMS2'``)
If not provided, it will try to detect it from ``name`` if provided, otherwise from the content itself
:param name: name of object
Examples:
>>> mapping = '''
MAPPING
FROM SQL {
SELECT *
FROM `benchmark`.`person`
}
TO {
?subject rdf:type :person
} WHERE {
BIND(template("http://api.stardog.com/person/nr={nr}") AS ?subject)
}
'''
>>> MappingRaw(mapping)
"""
self.raw = content
self.name = name
c_syntax = None
if name:
c_syntax = content_types.guess_mapping_format(name)
if c_syntax is None:
c_syntax = content_types.guess_mapping_format_from_content(content)
self.syntax = syntax if syntax else c_syntax
@contextlib.contextmanager
def data(self):
yield self.raw
class MappingFile(Content):
"""File-based content."""
def __init__(
self, file: str, syntax: Optional[str] = None, name: Optional[str] = None
):
"""Initializes a File object.
:param file: the filename/path of the file
:param syntax: The mapping syntax (``'STARDOG'``, ``'R2RML'``, or ``'SMS2'``)
If not provided, it will try to detect it from the ``file``'s extension.
:param name: the name of the object. If not provided, will fall back to the basename of the ``file``.
Examples:
>>> MappingFile('data.sms')
>>> MappingFile('data.sms2')
>>> MappingFile('data.rq')
>>> MappingFile('data.r2rml')
"""
self.fname = file
self.syntax = syntax if syntax else content_types.guess_mapping_format(file)
self.name = name if name else os.path.basename(file)
@contextlib.contextmanager
def data(self):
with open(self.fname, "rb") as f:
yield f
class ImportRaw(Content):
"""User-defined content."""
def __init__(
self,
content: object,
input_type: Optional[str] = None,
separator: Optional[str] = None,
content_type: Optional[str] = None,
content_encoding: Optional[str] = None,
name: Optional[str] = None,
iri: Optional[str] = None,
):
"""Initializes a Raw object.
:param content: Object representing the content (e.g., str, file)
:param input_type: ``'DELIMITED'`` or ``'JSON'``
:param separator: Required if ``input_type`` is ``'DELIMITED'``. Use ``','`` for a CSV. Use ``\\\\t`` for a TSV.
:param content_type: Content type
:param content_encoding: Content encoding
:param name: Object name
:param iri: IRI that uniquely identifies this content.
It will default to "file://``name``" if omitted.
.. note::
if ``name`` is provided like a pseudo filename (i.e. ``'data.csv'``, ``'data.tsv'``, or ``'data.json'``), it will auto-detect most
required parameters (``input_type``, ``separator``, ``content_type``, ``content_encoding``) - otherwise you must specify them.
Examples:
>>> ImportRaw('a,b,c', name='data.csv')
>>> ImportRaw('a\tb\tc', name='data.tsv')
>>> ImportRaw({'foo':'bar'}, name='data.json')
"""
self.raw = content
self.name = name
(c_enc, c_type, c_input_type, c_separator) = content_types.guess_import_format(
name
)
self.content_type = content_type if content_type else c_type
self.content_encoding = content_encoding if content_encoding else c_enc
self.input_type = input_type if input_type else c_input_type
self.separator = separator if separator else c_separator
self.iri = (
iri if iri else f"file://{name}" if name else f"stream://{uuid.uuid4()}"
)
@contextlib.contextmanager
def data(self):
yield self.raw
class ImportFile(Content):
"""File-based content for Delimited and JSON file."""
def __init__(
self,
file: str,
input_type: Optional[str] = None,
content_type: Optional[str] = None,
content_encoding: Optional[str] = None,
separator: Optional[str] = None,
name: Optional[str] = None,
iri: Optional[str] = None,
):
"""Initializes a File object.
:param file: filename/path of the file
:param input_type: ``'DELIMITED'`` or ``'JSON'``
:param content_type: Content type
:param content_encoding: Content encoding
:param separator: Required if ``input_type`` is ``'DELIMITED'``. Use ``','`` for a CSV. Use ``\\\\t`` for a TSV.
:param name: Object name.
It will be automatically detected from the ``file`` if omitted.
:param iri: IRI that uniquely identifies this file.
It will default to "file://``name``" if omitted.
.. note::
If ``file`` has a recognized extension (i.e. ``'data.csv'``, ``'data.tsv'``, or ``'data.json'``), it will auto-detect most
required parameters (``input_type``, ``separator``, ``content_type``, ``content_encoding``) - otherwise you must specify them.
Examples:
>>> ImportFile('data.csv')
>>> ImportFile('data.tsv')
>>> ImportFile('data.txt','DELIMITED',"\\\\t" )
>>> ImportFile('data.json')
"""
self.fname = file
(c_enc, c_type, c_input_type, c_separator) = content_types.guess_import_format(
file
)
self.content_type = content_type if content_type else c_type
self.content_encoding = content_encoding if content_encoding else c_enc
self.input_type = input_type if input_type else c_input_type
self.separator = separator if separator else c_separator
self.name = name if name else os.path.basename(file)
self.iri = iri if iri else f"file://{self.name}"
@contextlib.contextmanager
def data(self):
with open(self.fname, "rb") as f:
yield f
class URL(Content):
"""Url-based content."""
def __init__(
self,
url: str,
content_type: Optional[str] = None,
content_encoding: Optional[str] = None,
name: Optional[str] = None,
):
"""Initializes a URL object.
:param url: URL to the content
:param content_type: Content type.
It will be automatically detected from the ``url`` if not provided.
:param content_encoding: Content encoding.
It will be automatically detected from the ``url`` if not provided.
:param name: Object name.
It will be automatically detected from the ``url`` if not provided.
Examples:
>>> URL('http://example.com/data.ttl')
>>> URL('http://example.com/data.doc', 'application/msword')
"""
self.url = url
(c_enc, c_type) = content_types.guess_rdf_format(url)
self.content_type = content_type if content_type else c_type
self.content_encoding = content_encoding if content_encoding else c_enc
self.name = name if name else os.path.basename(url)
@contextlib.contextmanager
def data(self):
with requests.get(self.url, stream=True) as r:
yield r.content