Text analytics: retry_on_too_early on /metadata

Dario Varotto · Dario Varotto · commit e09669a91169 · 2021-04-12T09:50:49.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # Changelog
 
+## v1.0.47 (2021-04-12)
+Text analytics: Handle retry if too early on /metadata endpoint
+
 ## v1.0.46 (2021-04-05)
 Compatibility with RavenPack Edge
 
diff --git a/README.rst b/README.rst
@@ -11,7 +11,7 @@ Installation
 
 ::
 
-   pip install ravenpackapi
+    pip install ravenpackapi
 
 About
 -----
@@ -24,7 +24,7 @@ Usage
 -----
 
 In order to be able to use the RavenPack API you will need an API KEY.
-If you don’t already have one please contact your `customer
+If you don't already have one please contact your `customer
 support <mailto:sales@ravenpack.com>`__ representative.
 
 To begin using the API you will need to instantiate an API object that
@@ -35,9 +35,9 @@ environment variable or set it in your code:
 
 .. code:: python
 
-   from ravenpackapi import RPApi
+    from ravenpackapi import RPApi
 
-   api = RPApi(api_key="YOUR_API_KEY")
+    api = RPApi(api_key="YOUR_API_KEY")
 
 Creating a new dataset
 ~~~~~~~~~~~~~~~~~~~~~~
@@ -47,19 +47,19 @@ API with a Dataset instance.
 
 .. code:: python
 
-   from ravenpackapi import Dataset
+    from ravenpackapi import Dataset
 
-   ds = api.create_dataset(
-       Dataset(
-           name="New Dataset",
-           filters={
-               "relevance": {
-                   "$gte": 90
-               }
-           },
-       )
-   )
-   print("Dataset created", ds)
+    ds = api.create_dataset(
+        Dataset(
+            name="New Dataset",
+            filters={
+                "relevance": {
+                    "$gte": 90
+                }
+            },
+        )
+    )
+    print("Dataset created", ds)
 
 Getting data from the datasets
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -71,10 +71,10 @@ Here is how you may get a dataset definition for a pre-existing dataset
 
 .. code:: python
 
-   # Get the dataset description from the server, here we use 'us30'
-   # one of RavenPack public datasets with the top30 companies in the US  
+    # Get the dataset description from the server, here we use 'us30'
+    # one of RavenPack public datasets with the top30 companies in the US  
 
-   ds = api.get_dataset(dataset_id='us30')
+    ds = api.get_dataset(dataset_id='us30')
 
 Downloads: json
 ^^^^^^^^^^^^^^^
@@ -85,13 +85,13 @@ use the asynchronous datafile endpoint instead.
 
 .. code:: python
 
-   data = ds.json(
-       start_date='2018-01-05 18:00:00',
-       end_date='2018-01-05 18:01:00',
-   )
+    data = ds.json(
+        start_date='2018-01-05 18:00:00',
+        end_date='2018-01-05 18:01:00',
+    )
 
-   for record in data:
-       print(record)
+    for record in data:
+        print(record)
 
 Json queries are limited to
 
@@ -110,13 +110,13 @@ some time to complete.
 
 .. code:: python
 
-   job = ds.request_datafile(
-       start_date='2018-01-05 18:00:00',
-       end_date='2018-01-05 18:01:00',
-   )
+    job = ds.request_datafile(
+        start_date='2018-01-05 18:00:00',
+        end_date='2018-01-05 18:01:00',
+    )
 
-   with open('output.csv') as fp:
-       job.save_to_file(filename=fp.name)
+    with open('output.csv') as fp:
+        job.save_to_file(filename=fp.name)
 
 Streaming real-time data
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -132,64 +132,64 @@ You can find a `real-time streaming example
 here <ravenpackapi/examples/get_realtime_news.py>`__.
 
 The Result object handles the conversion of various fields into the
-appropriate type, i.e. \ ``record.timestamp_utc`` will be converted to
+appropriate type, i.e. ``record.timestamp_utc`` will be converted to
 ``datetime``
 
 Entity mapping
 ~~~~~~~~~~~~~~
 
-The entity mapping endpoint allow you to find the RP_ENTITY_ID mapped to
-your universe of entities.
+The entity mapping endpoint allow you to find the RP\_ENTITY\_ID mapped
+to your universe of entities.
 
 .. code:: python
 
-   universe = [
-       "RavenPack",
-       {'ticker': 'AAPL'},
-       'California USA',
-       {  # Amazon, specifying various fields
-           "client_id": "12345-A",
-           "date": "2017-01-01",
-           "name": "Amazon Inc.",
-           "entity_type": "COMP",
-           "isin": "US0231351067",
-           "cusip": "023135106",
-           "sedol": "B58WM62",
-           "listing": "XNAS:AMZN"
-       },
-
-   ]
-   mapping = api.get_entity_mapping(universe)
-
-   # in this case we match everything
-   assert len(mapping.matched) == len(universe)
-   assert [m.name for m in mapping.matched] == [
-       "RavenPack International S.L.",
-       "Apple Inc.",
-       "California, U.S.",
-       "Amazon.com Inc."
-   ]
+    universe = [
+        "RavenPack",
+        {'ticker': 'AAPL'},
+        'California USA',
+        {  # Amazon, specifying various fields
+            "client_id": "12345-A",
+            "date": "2017-01-01",
+            "name": "Amazon Inc.",
+            "entity_type": "COMP",
+            "isin": "US0231351067",
+            "cusip": "023135106",
+            "sedol": "B58WM62",
+            "listing": "XNAS:AMZN"
+        },
+
+    ]
+    mapping = api.get_entity_mapping(universe)
+
+    # in this case we match everything
+    assert len(mapping.matched) == len(universe)
+    assert [m.name for m in mapping.matched] == [
+        "RavenPack International S.L.",
+        "Apple Inc.",
+        "California, U.S.",
+        "Amazon.com Inc."
+    ]
 
 Entity reference
 ~~~~~~~~~~~~~~~~
 
 The entity reference endpoint give you all the available information for
-an Entity given the RP_ENTITY_ID
+an Entity given the RP\_ENTITY\_ID
 
 .. code:: python
 
-   ALPHABET_RP_ENTITY_ID = '4A6F00'
+    ALPHABET_RP_ENTITY_ID = '4A6F00'
 
-   references = api.get_entity_reference(ALPHABET_RP_ENTITY_ID)
+    references = api.get_entity_reference(ALPHABET_RP_ENTITY_ID)
 
-   # show all the names over history
-   for name in references.names:
-       print(name.value, name.start, name.end)
+    # show all the names over history
+    for name in references.names:
+        print(name.value, name.start, name.end)
 
-   # print all the ticket valid today
-   for ticker in references.tickers:
-       if ticker.is_valid():
-           print(ticker)
+    # print all the ticket valid today
+    for ticker in references.tickers:
+        if ticker.is_valid():
+            print(ticker)
 
 Text Analytics
 ~~~~~~~~~~~~~~
@@ -213,15 +213,15 @@ internal proxy:
 
 .. code:: python
 
-   api = RPApi()
-   api.common_request_params.update(
-       dict(
-           proxies={'https': 'http://your_internal_proxy:9999'},
-           verify=False,
-       )
-   )
+    api = RPApi()
+    api.common_request_params.update(
+        dict(
+            proxies={'https': 'http://your_internal_proxy:9999'},
+            verify=False,
+        )
+    )
 
-   # use the api to do requests
+    # use the api to do requests
 
 PS. For setting your internal proxies, requests will honor the
-HTTPS_PROXY environment variable.
+HTTPS\_PROXY environment variable.
diff --git a/ravenpackapi/core.py b/ravenpackapi/core.py
@@ -14,7 +14,7 @@
 from ravenpackapi.utils.dynamic_sessions import DynamicSession
 
 _VALID_METHODS = ('get', 'post', 'put', 'delete', 'patch')
-VERSION = '1.0.46'
+VERSION = '1.0.47'
 
 logger = logging.getLogger("ravenpack.core")
 
diff --git a/ravenpackapi/upload/models.py b/ravenpackapi/upload/models.py
@@ -54,7 +54,8 @@ def get_metadata(self, force_refresh=False):
         if self.file_name and not force_refresh:  # we already have the file metadata
             pass
         else:
-            response = self.api.request('%s/files/%s/metadata' % (self.api._UPLOAD_BASE_URL, self.file_id))
+            response = retry_on_too_early(self.api.request,
+                                          '%s/files/%s/metadata' % (self.api._UPLOAD_BASE_URL, self.file_id))
             metadata = response.json()
             for field in FILE_FIELDS:
                 setattr(self, field, metadata.get(field))
@@ -66,7 +67,7 @@ def get_metadata(self, force_refresh=False):
     @api_method
     def save_original(self, filename):
         response = retry_on_too_early(self.api.request,
-                             '%s/files/%s' % (self.api._UPLOAD_BASE_URL, self.file_id),
+                                      '%s/files/%s' % (self.api._UPLOAD_BASE_URL, self.file_id),
                                       stream=True)
         with open(filename, 'wb') as f:
             for chunk in response.iter_content(chunk_size=self.api._CHUNK_SIZE):
@@ -76,11 +77,11 @@ def save_original(self, filename):
     def save_analytics(self, filename, output_format='application/json'):
         self.wait_for_completion()
         response = retry_on_too_early(self.api.request,
-                             '%s/files/%s/analytics' % (self.api._UPLOAD_BASE_URL, self.file_id,),
+                                      '%s/files/%s/analytics' % (self.api._UPLOAD_BASE_URL, self.file_id,),
                                       headers=dict(
-                                 Accept=output_format,
-                                 **self.api.headers
-                             ),
+                                          Accept=output_format,
+                                          **self.api.headers
+                                      ),
                                       stream=True)
         with open(filename, 'wb') as f:
             for chunk in response.iter_content(chunk_size=self.api._CHUNK_SIZE):
@@ -90,11 +91,11 @@ def save_analytics(self, filename, output_format='application/json'):
     def get_analytics(self, output_format='application/json'):
         self.wait_for_completion()
         response = retry_on_too_early(self.api.request,
-                             '%s/files/%s/analytics' % (self.api._UPLOAD_BASE_URL, self.file_id,),
+                                      '%s/files/%s/analytics' % (self.api._UPLOAD_BASE_URL, self.file_id,),
                                       headers=dict(
-                                 Accept=output_format,
-                                 **self.api.headers
-                             ))
+                                          Accept=output_format,
+                                          **self.api.headers
+                                      ))
         if output_format == 'application/json':
             return response.json()
         else:
@@ -104,7 +105,7 @@ def get_analytics(self, output_format='application/json'):
     def save_annotated(self, filename):
         self.wait_for_completion()
         response = retry_on_too_early(self.api.request,
-                             '%s/files/%s/annotated' % (self.api._UPLOAD_BASE_URL, self.file_id),
+                                      '%s/files/%s/annotated' % (self.api._UPLOAD_BASE_URL, self.file_id),
                                       stream=True)
         with open(filename, 'wb') as f:
             for chunk in response.iter_content(chunk_size=self.api._CHUNK_SIZE):
@@ -114,14 +115,14 @@ def save_annotated(self, filename):
     def get_annotated(self):
         self.wait_for_completion()
         response = retry_on_too_early(self.api.request,
-                             '%s/files/%s/annotated' % (self.api._UPLOAD_BASE_URL, self.file_id)
+                                      '%s/files/%s/annotated' % (self.api._UPLOAD_BASE_URL, self.file_id)
                                       )
         return response.text
 
     @api_method
     def delete(self):
         response = retry_on_too_early(self.api.request,
-                             '%s/files/%s' % (self.api._UPLOAD_BASE_URL, self.file_id),
+                                      '%s/files/%s' % (self.api._UPLOAD_BASE_URL, self.file_id),
                                       method='delete'
                                       )
         return response
diff --git a/setup.py b/setup.py
@@ -1,6 +1,6 @@
 from setuptools import setup, find_packages
 
-VERSION = '1.0.46'
+VERSION = '1.0.47'
 
 with open('README.rst') as readme_file:
     readme = readme_file.read()