Merge pull request #22 from mwaldronii/updating-prism-table

CurtLH · web-flow · commit db7635bf4dce · 2020-12-07T08:07:51.000-08:00
Prism API V2 Updates for Tables
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
@@ -25,7 +25,7 @@ jobs:
     - name: Format with Black
       run: |
         pip install black
-        black . --check
+        black . --check --verbose --line-length=120
     - name: Lint with flake8
       run: |
         pip install flake8
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 # Prism-Python
 
-Python client library  and command line interface (CLI) for interacting with Workday’s Prism API.
+Python client library for interacting with Workday’s Prism API V2.
 
 ## Install
 You may install the latest version by cloning this repository from GitHub
@@ -43,60 +43,6 @@ export prism_client_secret=<INSERT PRISM CLIENT SECRET HERE>
 export prism_refresh_token=<INSERT PRISM REFRESH TOKEN HERE>
 ```
 
-## Example: Create a new dataset with Prism API Version 1
-
-### Using the CLI
-
-```bash
-# view the help page
-prism --help
-
-# list all datasets of type API
-prism list
-
-# upload a gzip CSV file
-prism upload my_new_dataset /path/to/schema.json /path/to/file.csv.gz
-```
-
-### Using as a Python Package
-
-```python
-import os
-import prism
-
-# initialize the prism class with your credentials
-p = prism.Prism(
-    os.getenv("workday_base_url"),
-    os.getenv("workday_tenant_name"),
-    os.getenv("prism_client_id"), 
-    os.getenv("prism_client_secret"),
-    os.getenv("prism_refresh_token"),
-    version="v1"
-)
-
-# create the bearer token
-p.create_bearer_token()
-
-# create an empty API dataset
-dataset = p.create_dataset("my_new_dataset")
-
-# read in your dataset schema
-schema = prism.load_schema("/path/to/schema.json")
-
-# create a new bucket to hold your file
-bucket = p.create_bucket(schema, dataset["id"])
-
-# add your file the bucket you just created
-p.upload_file_to_bucket(bucket["id"], "/path/to/file.csv.gz")
-
-# complete the bucket and upload your file
-p.complete_bucket(bucket["id"])
-
-# check the status of the dataset you just created
-status = p.list_dataset(dataset["id"])
-print(status)
-```
-
 ## Example: Create a new table with Prism API Version 2
 
 ```python
@@ -120,29 +66,32 @@ p.create_bearer_token()
 schema = prism.load_schema("/path/to/schema.json")
 
 # create an empty API table with your schema
-table = p.create_dataset('my_new_table', schema=schema['fields'])
+table = p.create_table('my_new_table', schema=schema['fields'])
 
 # get the details about the newly created table
-details = p.describe_dataset(table['id'])
+details = p.describe_table(table['id'])
 
 # convert the details to a bucket schema
 bucket_schema = p.convert_describe_schema_to_bucket_schema(details)
 
 # create a new bucket to hold your file
-bucket = p.create_bucket(bucket_schema, table['id'], operation="Replace")
+bucket = p.create_bucket(bucket_schema, table['id'], operation="TruncateandInsert")
 
 # add your file the bucket you just created
 p.upload_file_to_bucket(bucket["id"], "/path/to/file.csv.gz")
 
 # complete the bucket and upload your file
 p.complete_bucket(bucket["id"])
 
-# check the status of the table you just created
-status = p.list_dataset(table['id'])
-print(status)
+# check the status of the bucket you just completed
+status = p.list_bucket(bucket["id"])
+print(status.get('errorMessage'))
 ```
 
-## Example: Append data to an existing table with Prism API Version 2
+## Example: Manage data in an existing table with Prism API Version 2
+Table Operations Available - “TruncateandInsert”, “Insert”, “Update”, “Upsert”, “Delete”.
+
+To use the Update/Upsert/Delete operations you must specify an external id field within your table schema.
 
 ```python
 import os
@@ -162,30 +111,30 @@ p = prism.Prism(
 p.create_bearer_token()
 
 # look through all of the existing tables to find the table you intend to append
-all_datasets = p.list_dataset()
-for table in all_datasets['data']:
+all_tables = p.list_table()
+for table in all_tables['data']:
     if table['name'] == "my_new_table":
         print(table)
         break
 
 # get the details about the newly created table
-details = p.describe_dataset(table['id'])
+details = p.describe_table(table['id'])
 
 # convert the details to a bucket schema
 bucket_schema = p.convert_describe_schema_to_bucket_schema(details)
 
 # create a new bucket to hold your file
-bucket = p.create_bucket(bucket_schema, table['id'], operation="Append")
+bucket = p.create_bucket(bucket_schema, table['id'], operation="Insert")
 
-# add your file the bucket you just created
+# add your file to the bucket you just created
 p.upload_file_to_bucket(bucket["id"], "/path/to/file.csv.gz")
 
 # complete the bucket and upload your file
 p.complete_bucket(bucket["id"])
 
-# check the status of the table you just created
-status = p.list_dataset(table['id'])
-print(status)
+# check the status of the bucket you just completed
+status = p.list_bucket(bucket["id"])
+print(status.get('errorMessage'))
 ```
 
 ## Bugs
diff --git a/prism/prism.py b/prism/prism.py
@@ -30,7 +30,7 @@ def load_schema(filename):
     Returns
     -------
     schema : dict
-        A dictionary containing the schema for your dataset.
+        A dictionary containing the schema for your table.
 
     """
     with open(filename) as f:
@@ -63,8 +63,8 @@ class Prism:
         The version of the Prism API to use
     """
 
-    def __init__(self, base_url, tenant_name, client_id, client_secret, refresh_token, version="v1"):
-        """Init the Prism class with required attribues."""
+    def __init__(self, base_url, tenant_name, client_id, client_secret, refresh_token, version="v2"):
+        """Init the Prism class with required attributes."""
         self.base_url = base_url
         self.tenant_name = tenant_name
         self.client_id = client_id
@@ -82,7 +82,6 @@ def create_bearer_token(self):
 
         Parameters
         ----------
-        None
 
         Returns
         -------
@@ -107,13 +106,13 @@ def create_bearer_token(self):
         else:
             logging.warning("HTTP Error {}".format(r.status_code))
 
-    def create_dataset(self, dataset_name, schema=None):
-        """Create an empty dataset of type "API".
+    def create_table(self, table_name, schema=None):
+        """Create an empty table of type "API".
 
         Parameters
         ----------
-        dataset_name : str
-            The dataset name. The name must be unique and conform to the name
+        table_name : str
+            The table name. The name must be unique and conform to the name
             validation rules.
 
         schema : list
@@ -122,7 +121,7 @@ def create_dataset(self, dataset_name, schema=None):
         Returns
         -------
         If the request is successful, a dictionary containing information about
-        the new dataset is returned.
+        the new table is returned.
 
         """
         url = self.prism_endpoint + "/datasets"
@@ -132,35 +131,39 @@ def create_dataset(self, dataset_name, schema=None):
             "Content-Type": "application/json",
         }
 
-        data = {"name": dataset_name}
+        data = {"name": table_name}
 
         if schema is not None:
             data["fields"] = schema
 
         r = requests.post(url, headers=headers, data=json.dumps(data))
 
         if r.status_code == 201:
-            logging.info("Successfully created an empty API dataset")
+            logging.info("Successfully created an empty API table")
             return r.json()
         elif r.status_code == 400:
             logging.warning(r.json()["errors"][0]["error"])
         else:
             logging.warning("HTTP Error {}".format(r.status_code))
 
-    def create_bucket(self, schema, dataset_id, operation="Replace"):
+    def create_bucket(self, schema, table_id, operation="TruncateandInsert"):
         """Create a temporary bucket to upload files.
 
         Parameters
         ----------
         schema : dict
-            A dictionary containing the schema for your dataset.
+            A dictionary containing the schema for your table.
 
-        dataset_id : str
-            The ID of the dataset that this bucket is to be associated with.
+        table_id : str
+            The ID of the table that this bucket is to be associated with.
 
         operation : str
-            If not specified, defaults to "Replace" operation
-            Optional values - "Replace" or "Append"
+           Required, defaults to "TruncateandInsert" operation
+           Additional Operations - “Insert”, “Update”, “Upsert”, “Delete”
+           When you use Update/Upsert/Delete operation you must specify which field to use
+           as the matching key by setting the ‘useAsOperationKey’ attribute on that field as True.
+           Only fields marked as ExternalID or WPA_RowID or WPA_LoadId on Table schema can be used
+           as operation keys during loads into the table.
 
         Returns
         -------
@@ -178,7 +181,7 @@ def create_bucket(self, schema, dataset_id, operation="Replace"):
         data = {
             "name": "prism_python_wbucket_" + str(random.randint(1000000, 9999999)),
             "operation": {"id": "Operation_Type=" + operation},
-            "targetDataset": {"id": dataset_id},
+            "targetDataset": {"id": table_id},
             "schema": schema,
         }
 
@@ -282,55 +285,57 @@ def list_bucket(self, bucket_id=None):
         else:
             logging.warning("HTTP Error {}".format(r.status_code))
 
-    def list_dataset(self, dataset_id=None):
-        """Obtain details for all datasets or a given dataset.
+    def list_table(self, table_name=None):
+        """Obtain details for all tables or a given table.
 
         Parameters
         ----------
-        dataset_id : str
-            The ID of the dataset to obtain details about. If the default value
-            of None is specified, details regarding all datasets is returned.
+        table_name : str
+            The name of the table to obtain details about. If the default value
+            of None is specified, details regarding first 100 tables is returned.
 
         Returns
         -------
         If the request is successful, a dictionary containing information about
-        the dataset is returned.
+        the table is returned.
 
         """
-        url = self.prism_endpoint + "/datasets"
+        url = self.prism_endpoint + "/datasets?"
+
+        if table_name is not None:
+            url = url + "name=" + table_name
 
-        if dataset_id is not None:
-            url = url + "/" + dataset_id
+        params = {"limit": 100}
 
         headers = {"Authorization": "Bearer " + self.bearer_token}
 
-        r = requests.get(url, headers=headers)
+        r = requests.get(url, params=params, headers=headers)
 
         if r.status_code == 200:
-            logging.info("Successfully obtained information about your datasets")
+            logging.info("Successfully obtained information about your tables")
             return r.json()
         else:
             logging.warning("HTTP Error {}".format(r.status_code))
 
-    def describe_dataset(self, dataset_id=None):
-        """Obtain details for for a given dataset/table
+    def describe_table(self, table_id=None):
+        """Obtain details for for a given table
 
         Parameters
         ----------
-        dataset_id : str
-            The ID of the dataset to obtain datails about. If the default value
-            of None is specified, details regarding all datasets is returned.
+        table_id : str
+            The ID of the table to obtain details about. If the default value
+            of None is specified, details regarding all tables is returned.
 
         Returns
         -------
         If the request is successful, a dictionary containing information about
-        the dataset is returned.
+        the table is returned.
 
         """
-        url = self.prism_endpoint + "/datasets"
+        url = self.prism_endpoint + "/datasets/"
 
-        if dataset_id is not None:
-            url = url + "/" + dataset_id + "/describe"
+        if table_id is not None:
+            url = url + table_id + "/describe"
 
         headers = {"Authorization": "Bearer " + self.bearer_token}
 
@@ -343,16 +348,16 @@ def describe_dataset(self, dataset_id=None):
             logging.warning("HTTP Error {}".format(r.status_code))
 
     def convert_describe_schema_to_bucket_schema(self, describe_schema):
-        """Convert schema (derived from describe dataset/table) to bucket schema
+        """Convert schema (derived from describe table) to bucket schema
 
         Parameters
         ----------
-        schema : dict
+        describe_schema: dict
             A dictionary containing the describe schema for your dataset.
 
         Returns
         -------
-        If the request is succesful, a dictionary containing the bucket schema is returned.
+        If the request is successful, a dictionary containing the bucket schema is returned.
         The results can then be passed to the create_bucket function
 
         """
@@ -362,6 +367,16 @@ def convert_describe_schema_to_bucket_schema(self, describe_schema):
         # in the dict that is in ['data'][0]
         fields = describe_schema["data"][0]["fields"]
 
+        # Create and assign useAsOperationKey field with true/false values based on externalId value
+        operation_key_false = {"useAsOperationKey": False}
+        operation_key_true = {"useAsOperationKey": True}
+
+        for i in fields:
+            if i["externalId"] is True:
+                i.update(operation_key_true)
+            else:
+                i.update(operation_key_false)
+
         # Now trim our fields data to keep just what we need
         for i in fields:
             del i["id"]
@@ -385,9 +400,9 @@ def convert_describe_schema_to_bucket_schema(self, describe_schema):
         }
 
         # The footer for the load schema
-        schemaVersion = {"id": "Schema_Version=1.0"}
+        schema_version = {"id": "Schema_Version=1.0"}
 
         bucket_schema["fields"] = fields
-        bucket_schema["schemaVersion"] = schemaVersion
+        bucket_schema["schemaVersion"] = schema_version
 
         return bucket_schema