@@ -30,7 +30,7 @@ def load_schema(filename):
30
30
Returns
31
31
-------
32
32
schema : dict
33
- A dictionary containing the schema for your dataset .
33
+ A dictionary containing the schema for your table .
34
34
35
35
"""
36
36
with open (filename ) as f :
@@ -63,8 +63,8 @@ class Prism:
63
63
The version of the Prism API to use
64
64
"""
65
65
66
- def __init__ (self , base_url , tenant_name , client_id , client_secret , refresh_token , version = "v1 " ):
67
- """Init the Prism class with required attribues ."""
66
+ def __init__ (self , base_url , tenant_name , client_id , client_secret , refresh_token , version = "v2 " ):
67
+ """Init the Prism class with required attributes ."""
68
68
self .base_url = base_url
69
69
self .tenant_name = tenant_name
70
70
self .client_id = client_id
@@ -82,7 +82,6 @@ def create_bearer_token(self):
82
82
83
83
Parameters
84
84
----------
85
- None
86
85
87
86
Returns
88
87
-------
@@ -107,13 +106,13 @@ def create_bearer_token(self):
107
106
else :
108
107
logging .warning ("HTTP Error {}" .format (r .status_code ))
109
108
110
- def create_dataset (self , dataset_name , schema = None ):
111
- """Create an empty dataset of type "API".
109
+ def create_table (self , table_name , schema = None ):
110
+ """Create an empty table of type "API".
112
111
113
112
Parameters
114
113
----------
115
- dataset_name : str
116
- The dataset name. The name must be unique and conform to the name
114
+ table_name : str
115
+ The table name. The name must be unique and conform to the name
117
116
validation rules.
118
117
119
118
schema : list
@@ -122,7 +121,7 @@ def create_dataset(self, dataset_name, schema=None):
122
121
Returns
123
122
-------
124
123
If the request is successful, a dictionary containing information about
125
- the new dataset is returned.
124
+ the new table is returned.
126
125
127
126
"""
128
127
url = self .prism_endpoint + "/datasets"
@@ -132,35 +131,39 @@ def create_dataset(self, dataset_name, schema=None):
132
131
"Content-Type" : "application/json" ,
133
132
}
134
133
135
- data = {"name" : dataset_name }
134
+ data = {"name" : table_name }
136
135
137
136
if schema is not None :
138
137
data ["fields" ] = schema
139
138
140
139
r = requests .post (url , headers = headers , data = json .dumps (data ))
141
140
142
141
if r .status_code == 201 :
143
- logging .info ("Successfully created an empty API dataset " )
142
+ logging .info ("Successfully created an empty API table " )
144
143
return r .json ()
145
144
elif r .status_code == 400 :
146
145
logging .warning (r .json ()["errors" ][0 ]["error" ])
147
146
else :
148
147
logging .warning ("HTTP Error {}" .format (r .status_code ))
149
148
150
- def create_bucket (self , schema , dataset_id , operation = "Replace " ):
149
+ def create_bucket (self , schema , table_id , operation = "TruncateandInsert " ):
151
150
"""Create a temporary bucket to upload files.
152
151
153
152
Parameters
154
153
----------
155
154
schema : dict
156
- A dictionary containing the schema for your dataset .
155
+ A dictionary containing the schema for your table .
157
156
158
- dataset_id : str
159
- The ID of the dataset that this bucket is to be associated with.
157
+ table_id : str
158
+ The ID of the table that this bucket is to be associated with.
160
159
161
160
operation : str
162
- If not specified, defaults to "Replace" operation
163
- Optional values - "Replace" or "Append"
161
+ Required, defaults to "TruncateandInsert" operation
162
+ Additional Operations - “Insert”, “Update”, “Upsert”, “Delete”
163
+ When you use Update/Upsert/Delete operation you must specify which field to use
164
+ as the matching key by setting the ‘useAsOperationKey’ attribute on that field as True.
165
+ Only fields marked as ExternalID or WPA_RowID or WPA_LoadId on Table schema can be used
166
+ as operation keys during loads into the table.
164
167
165
168
Returns
166
169
-------
@@ -178,7 +181,7 @@ def create_bucket(self, schema, dataset_id, operation="Replace"):
178
181
data = {
179
182
"name" : "prism_python_wbucket_" + str (random .randint (1000000 , 9999999 )),
180
183
"operation" : {"id" : "Operation_Type=" + operation },
181
- "targetDataset" : {"id" : dataset_id },
184
+ "targetDataset" : {"id" : table_id },
182
185
"schema" : schema ,
183
186
}
184
187
@@ -282,55 +285,57 @@ def list_bucket(self, bucket_id=None):
282
285
else :
283
286
logging .warning ("HTTP Error {}" .format (r .status_code ))
284
287
285
- def list_dataset (self , dataset_id = None ):
286
- """Obtain details for all datasets or a given dataset .
288
+ def list_table (self , table_name = None ):
289
+ """Obtain details for all tables or a given table .
287
290
288
291
Parameters
289
292
----------
290
- dataset_id : str
291
- The ID of the dataset to obtain details about. If the default value
292
- of None is specified, details regarding all datasets is returned.
293
+ table_name : str
294
+ The name of the table to obtain details about. If the default value
295
+ of None is specified, details regarding first 100 tables is returned.
293
296
294
297
Returns
295
298
-------
296
299
If the request is successful, a dictionary containing information about
297
- the dataset is returned.
300
+ the table is returned.
298
301
299
302
"""
300
- url = self .prism_endpoint + "/datasets"
303
+ url = self .prism_endpoint + "/datasets?"
304
+
305
+ if table_name is not None :
306
+ url = url + "name=" + table_name
301
307
302
- if dataset_id is not None :
303
- url = url + "/" + dataset_id
308
+ params = {"limit" : 100 }
304
309
305
310
headers = {"Authorization" : "Bearer " + self .bearer_token }
306
311
307
- r = requests .get (url , headers = headers )
312
+ r = requests .get (url , params = params , headers = headers )
308
313
309
314
if r .status_code == 200 :
310
- logging .info ("Successfully obtained information about your datasets " )
315
+ logging .info ("Successfully obtained information about your tables " )
311
316
return r .json ()
312
317
else :
313
318
logging .warning ("HTTP Error {}" .format (r .status_code ))
314
319
315
- def describe_dataset (self , dataset_id = None ):
316
- """Obtain details for for a given dataset/ table
320
+ def describe_table (self , table_id = None ):
321
+ """Obtain details for for a given table
317
322
318
323
Parameters
319
324
----------
320
- dataset_id : str
321
- The ID of the dataset to obtain datails about. If the default value
322
- of None is specified, details regarding all datasets is returned.
325
+ table_id : str
326
+ The ID of the table to obtain details about. If the default value
327
+ of None is specified, details regarding all tables is returned.
323
328
324
329
Returns
325
330
-------
326
331
If the request is successful, a dictionary containing information about
327
- the dataset is returned.
332
+ the table is returned.
328
333
329
334
"""
330
- url = self .prism_endpoint + "/datasets"
335
+ url = self .prism_endpoint + "/datasets/ "
331
336
332
- if dataset_id is not None :
333
- url = url + "/" + dataset_id + "/describe"
337
+ if table_id is not None :
338
+ url = url + table_id + "/describe"
334
339
335
340
headers = {"Authorization" : "Bearer " + self .bearer_token }
336
341
@@ -343,16 +348,16 @@ def describe_dataset(self, dataset_id=None):
343
348
logging .warning ("HTTP Error {}" .format (r .status_code ))
344
349
345
350
def convert_describe_schema_to_bucket_schema (self , describe_schema ):
346
- """Convert schema (derived from describe dataset/ table) to bucket schema
351
+ """Convert schema (derived from describe table) to bucket schema
347
352
348
353
Parameters
349
354
----------
350
- schema : dict
355
+ describe_schema : dict
351
356
A dictionary containing the describe schema for your dataset.
352
357
353
358
Returns
354
359
-------
355
- If the request is succesful , a dictionary containing the bucket schema is returned.
360
+ If the request is successful , a dictionary containing the bucket schema is returned.
356
361
The results can then be passed to the create_bucket function
357
362
358
363
"""
@@ -362,6 +367,16 @@ def convert_describe_schema_to_bucket_schema(self, describe_schema):
362
367
# in the dict that is in ['data'][0]
363
368
fields = describe_schema ["data" ][0 ]["fields" ]
364
369
370
+ # Create and assign useAsOperationKey field with true/false values based on externalId value
371
+ operation_key_false = {"useAsOperationKey" : False }
372
+ operation_key_true = {"useAsOperationKey" : True }
373
+
374
+ for i in fields :
375
+ if i ["externalId" ] is True :
376
+ i .update (operation_key_true )
377
+ else :
378
+ i .update (operation_key_false )
379
+
365
380
# Now trim our fields data to keep just what we need
366
381
for i in fields :
367
382
del i ["id" ]
@@ -385,9 +400,9 @@ def convert_describe_schema_to_bucket_schema(self, describe_schema):
385
400
}
386
401
387
402
# The footer for the load schema
388
- schemaVersion = {"id" : "Schema_Version=1.0" }
403
+ schema_version = {"id" : "Schema_Version=1.0" }
389
404
390
405
bucket_schema ["fields" ] = fields
391
- bucket_schema ["schemaVersion" ] = schemaVersion
406
+ bucket_schema ["schemaVersion" ] = schema_version
392
407
393
408
return bucket_schema
0 commit comments