Skip to content

Commit 2d54c8f

Browse files
committed
v1.0.36 - Text-Analytics updates
1 parent 010b1c7 commit 2d54c8f

File tree

7 files changed

+146
-32
lines changed

7 files changed

+146
-32
lines changed

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
# Changelog
22

3+
## v1.0.36 (2020-05-14)
4+
* Text-Analytics API endpoint updated: folders & richer metadata
5+
* Extended error handling to support Feed disconnection problems
6+
37
## v1.0.35 (2020-02-22)
4-
Initial support for the Upload-API endpoints
8+
Initial support for the Text-Analytics API endpoints
59

610
## v1.0.34 (2019-11-11)
711
Retrieve a lazy-loaded dataset when setting one of its paramters.

ravenpackapi/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
from ravenpackapi.utils.constants import ENTITY_TYPES
1515
from ravenpackapi.utils.date_formats import as_datetime_str
1616

17-
_VALID_METHODS = ('get', 'post', 'put', 'delete')
18-
VERSION = '1.0.35'
17+
_VALID_METHODS = ('get', 'post', 'put', 'delete', 'patch')
18+
VERSION = '1.0.36'
1919

2020
logger = logging.getLogger("ravenpack.core")
2121

ravenpackapi/examples/text_extraction.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,13 @@
99

1010
# upload a file to access the analytics
1111
f = api.upload.file("_orig.doc",
12-
# properties={"primary_entity": "Ravenpack"}
12+
# properties={"primary_entity": "RavenPack"}
1313
)
14+
f.wait_for_completion()
15+
1416
# we can also get it if we know the id
1517
# f = api.upload.get('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX')
1618

17-
f.wait_for_completion()
1819
# get back the analytics found in the document
1920
f.save_analytics("_analytics.json")
2021

@@ -25,7 +26,7 @@
2526
f.save_original("_orig.doc")
2627

2728
# given a file we can set tags
28-
# f.set_tags(['file tag'])
29+
# f.set_metadata(tags=['file tag'])
2930

3031
# ... or delete it
3132
# f.delete()

ravenpackapi/upload/models.py

Lines changed: 93 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,63 @@
1-
import json
21
from time import sleep
32

43
from ravenpackapi.exceptions import api_method
54

5+
FILE_FIELDS = (
6+
'file_id', 'file_name', 'folder_id',
7+
'status',
8+
'upload_ts', 'raw_size', 'starred', 'trashed',
9+
'tags',
10+
)
11+
12+
FOLDER_FIELDS = (
13+
'folder_id', 'parent_folder_id',
14+
'folder_name',
15+
'starred', 'trashed',
16+
)
17+
618

719
class File(object):
820
""" A promise to get a file """
921

22+
# check FILE_FIELD for the supported fields
1023
def __init__(self, file_id,
24+
file_name=None, folder_id=None,
1125
status=None,
12-
name=None,
26+
upload_ts=None, raw_size=None, starred=None, trashed=None,
27+
tags=None,
1328
api=None,
1429
):
30+
self.api = api
1531
self.file_id = file_id
32+
self.folder_id = folder_id
33+
self.tags = tags or []
1634
self.status = status
17-
self.name = name
18-
self.api = api
35+
36+
self.upload_ts = upload_ts
37+
self.starred = starred
38+
self.trashed = trashed
39+
self.raw_size = raw_size
40+
self.file_name = file_name
1941

2042
def __str__(self):
21-
return "File: %(file_id)s - %(name)s - status: %(status)s" % self.__dict__
43+
self.get_metadata() # be sure to have the metadata (this is called only once per file)
44+
return "File: %(file_id)s - %(file_name)s - status: %(status)s" % self.__dict__
2245

2346
@api_method
2447
def get_status(self):
2548
response = self.api.request('%s/files/%s/status' % (self.api._UPLOAD_BASE_URL, self.file_id))
2649
self.status = response.json()['status']
2750
return self.status
2851

52+
@api_method
53+
def get_metadata(self, force_refresh=False):
54+
if self.file_name and not force_refresh: # we already have the file metadata
55+
return
56+
response = self.api.request('%s/files/%s/metadata' % (self.api._UPLOAD_BASE_URL, self.file_id))
57+
metadata = response.json()
58+
for field in FILE_FIELDS:
59+
setattr(self, field, metadata.get(field))
60+
2961
@api_method
3062
def save_original(self, filename):
3163
response = self.api.request('%s/files/%s' % (self.api._UPLOAD_BASE_URL, self.file_id),
@@ -61,12 +93,64 @@ def delete(self):
6193
return response
6294

6395
@api_method
64-
def set_tags(self, tags):
65-
self.api.request('%s/files/%s/tags' % (self.api._UPLOAD_BASE_URL, self.file_id),
66-
data=json.dumps(tags),
67-
method='put')
96+
def set_metadata(self, file_name=None,
97+
folder_id=None,
98+
trashed=None, starred=None,
99+
tags=None
100+
):
101+
metadata = {k: v
102+
for k, v in dict(file_name=file_name, folder_id=folder_id,
103+
trashed=trashed, starred=starred, tags=tags,
104+
).items()
105+
if v is not None}
106+
self.api.request('%s/files/%s/metadata' % (self.api._UPLOAD_BASE_URL, self.file_id),
107+
json=metadata,
108+
method='patch')
68109

69110
def wait_for_completion(self):
70111
while self.status not in {"COMPLETED", "DELETED"}:
71112
sleep(1)
72113
self.get_status()
114+
115+
116+
class Folder(object):
117+
""" A Folder containing files """
118+
119+
def __init__(self, folder_id,
120+
folder_name=None,
121+
parent_folder_id=None,
122+
starred=None, trashed=None,
123+
api=None,
124+
):
125+
self.api = api
126+
self.folder_id = folder_id
127+
self.parent_folder_id = parent_folder_id
128+
self.folder_name = folder_name
129+
130+
self.starred = starred
131+
self.trashed = trashed
132+
133+
def __str__(self):
134+
return "Folder: %(folder_id)s - %(folder_name)s" % self.__dict__
135+
136+
@api_method
137+
def delete(self):
138+
response = self.api.request('%s/folder/%s' % (self.api._UPLOAD_BASE_URL, self.folder_id),
139+
method='delete')
140+
return response
141+
142+
@api_method
143+
def set_metadata(self,
144+
folder_name=None,
145+
parent_folder_id=None,
146+
trashed=None, starred=None,
147+
):
148+
metadata = {k: v
149+
for k, v in dict(folder_name=folder_name,
150+
parent_folder_id=parent_folder_id,
151+
trashed=trashed, starred=starred,
152+
).items()
153+
if v is not None}
154+
self.api.request('%s/folders/%s' % (self.api._UPLOAD_BASE_URL, self.folder_id),
155+
json=metadata,
156+
method='patch')

ravenpackapi/upload/module.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
import json
21
import os
32

4-
from ravenpackapi.upload.models import File
3+
from ravenpackapi.upload.models import (File, FILE_FIELDS,
4+
Folder, FOLDER_FIELDS)
55
from ravenpackapi.utils.date_formats import as_datetime_str
66

77

@@ -15,6 +15,7 @@ def list(self,
1515
tags=None,
1616
status=None,
1717
filename=None,
18+
page_size=50,
1819
):
1920
params = dict(
2021
start_date=as_datetime_str(start_date),
@@ -25,23 +26,28 @@ def list(self,
2526
)
2627

2728
# the list of files is splitted in pages - let's collect them
29+
get_next, offset = False, 0
2830
while True:
2931
response = self.api.request('%s/files' % self.api._UPLOAD_BASE_URL,
3032
params={k: v
3133
for k, v in params.items()
3234
if v is not None})
3335
data = response.json()
3436
if 'results' in data:
35-
for r in data['results']:
36-
yield File(r['file_id'],
37-
status=r.get('status'),
38-
name=r.get('name'),
39-
api=self.api,
40-
)
41-
if data.get('next_page_key'): # next page
42-
params['next_page_key'] = json.dumps(data['next_page_key'])
43-
else:
37+
results = data['results']
38+
for r in results:
39+
get_next = len(results) == page_size
40+
file_params = {
41+
field: r.get(field) for field in FILE_FIELDS
42+
}
43+
yield File(
44+
api=self.api,
45+
**file_params
46+
)
47+
if not get_next:
4448
break
49+
offset += page_size
50+
params['offset'] = offset
4551

4652
def file(self, name_or_file_handler, properties=None):
4753
""" Upload a file - file can be either a filename or a file handler """
@@ -53,10 +59,10 @@ def file(self, name_or_file_handler, properties=None):
5359
else:
5460
filepath = name_or_file_handler.name
5561
fh = name_or_file_handler
56-
filename = os.path.basename(filepath)
62+
file_name = os.path.basename(filepath)
5763

5864
params = dict(
59-
filename=filename,
65+
filename=file_name,
6066
properties=properties
6167
)
6268

@@ -85,7 +91,19 @@ def file(self, name_or_file_handler, properties=None):
8591
fh.close()
8692
return File(file_id,
8793
api=self.api,
88-
name=filename)
94+
file_name=file_name)
8995

9096
def get(self, file_id):
9197
return File(file_id, api=self.api)
98+
99+
def list_folders(self):
100+
response = self.api.request('%s/folders' % self.api._UPLOAD_BASE_URL)
101+
data = response.json()
102+
for r in data:
103+
folder_params = {
104+
field: r.get(field) for field in FOLDER_FIELDS
105+
}
106+
yield Folder(**folder_params)
107+
108+
def folder_get(self, folder_id):
109+
return Folder(folder_id, api=self.api)

ravenpackapi/util.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import csv
2+
import logging
23
import sys
34

45
import six
@@ -11,6 +12,7 @@
1112
SPLIT_MONTHLY = 'monthly'
1213
SPLIT_WEEKLY = 'weekly'
1314
SPLIT_DAILY = 'daily'
15+
logger = logging.getLogger("ravenpack.util")
1416

1517

1618
def parts_to_curl(method, endpoint, headers, data=None):
@@ -37,10 +39,15 @@ def parts_to_curl(method, endpoint, headers, data=None):
3739
def to_curl(request):
3840
if not request:
3941
return 'No request'
42+
try:
43+
data = request.body.decode() if getattr(request, 'body') else None
44+
except Exception as e:
45+
logger.debug("Cannot convert data to curl: %s" % e)
46+
data = "?"
4047
return parts_to_curl(request.method,
4148
request.url,
4249
request.headers,
43-
request.body if getattr(request, 'body') else None)
50+
data=data)
4451

4552

4653
def time_intervals(date_start, date_end, split=SPLIT_MONTHLY):

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from setuptools import setup, find_packages
22

3-
VERSION = '1.0.35'
3+
VERSION = '1.0.36'
44

55
with open('README.rst') as readme_file:
66
readme = readme_file.read()
@@ -24,7 +24,7 @@
2424
# 3 - Alpha
2525
# 4 - Beta
2626
# 5 - Production/Stable
27-
'Development Status :: 5 - Production',
27+
'Development Status :: 5 - Production/Stable',
2828

2929
# Indicate who your project is intended for
3030
'Intended Audience :: Developers',

0 commit comments

Comments
 (0)