-
Notifications
You must be signed in to change notification settings - Fork 340
add exporting table files #120
Changes from 4 commits
319e032
d3c4e58
87ff77a
ff664a8
debf27c
57aa3b9
5ca5c98
c5e998b
7c35fd4
1c94ae3
f2e219f
63eb730
c713f0c
3d8ea55
c681509
87b1a14
4c2924e
1bbf98e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from quandl.errors.quandl_error import InvalidRequestError | ||
from .utils.api_key_util import ApiKeyUtil | ||
from .model.datatable import Datatable | ||
from .message import Message | ||
|
||
|
||
def bulkdownloadtable(datatable_code, **kwargs): | ||
"""Downloads an entire table as a zip file. | ||
:param str datatable_code: The datatable code to download, such as MER/F1 | ||
:param str filename: The filename for the download. \ | ||
If not specified, will download to the current working directory | ||
:param str api_key: Most databases require api_key for bulk download | ||
""" | ||
|
||
# discourage users from using authtoken | ||
if 'authtoken' in kwargs: | ||
raise InvalidRequestError(Message.ERROR_AUTHTOKEN_NOT_SUPPORTED) | ||
|
||
ApiKeyUtil.init_api_key_from_args(kwargs) | ||
|
||
filename = kwargs.pop('filename', '.') | ||
return Datatable(datatable_code).bulk_download_file(filename, params=kwargs) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,27 @@ | ||
try: | ||
from urllib.parse import urlencode | ||
from urllib.request import urlopen | ||
except ImportError: | ||
from urllib import urlencode | ||
from urllib import urlopen | ||
|
||
from time import sleep | ||
import os | ||
|
||
from quandl.api_config import ApiConfig | ||
from quandl.connection import Connection | ||
from quandl.util import Util | ||
from quandl.errors.quandl_error import QuandlError | ||
from quandl.operations.get import GetOperation | ||
from quandl.operations.list import ListOperation | ||
from quandl.util import Util | ||
|
||
from .model_base import ModelBase | ||
from quandl.message import Message | ||
from .data import Data | ||
|
||
|
||
class Datatable(GetOperation, ListOperation, ModelBase): | ||
BULK_CHUNK_SIZE = 16 * 1024 | ||
|
||
@classmethod | ||
def get_path(cls): | ||
|
@@ -14,3 +30,54 @@ def get_path(cls): | |
def data(self, **options): | ||
updated_options = Util.convert_options(**options) | ||
return Data.page(self, **updated_options) | ||
|
||
def bulk_download_file(self, file_or_folder_path, **options): | ||
if not isinstance(file_or_folder_path, str): | ||
raise QuandlError(Message.ERROR_FOLDER_ISSUE) | ||
|
||
return self._url_request(file_or_folder_path, **options) | ||
|
||
def _url_request(self, file_or_folder_path, **options): | ||
|
||
url = self._download_request_path() | ||
code_name = self.code | ||
if 'params' not in options: | ||
options['params'] = {} | ||
if ApiConfig.api_key: | ||
options['params']['api_key'] = ApiConfig.api_key | ||
if ApiConfig.api_version: | ||
options['params']['api_version'] = ApiConfig.api_version | ||
|
||
if list(options.keys()): | ||
|
||
url += '.json?qopts.export=true&' + urlencode(options['params']) | ||
|
||
r = Connection.request('get', url, **options) | ||
response_data = r.json() | ||
|
||
status = response_data['datatable_bulk_download']['file']['status'] | ||
|
||
if status == 'fresh': | ||
file_link = response_data['datatable_bulk_download']['file']['link'] | ||
|
||
file_path = file_or_folder_path | ||
if os.path.isdir(file_or_folder_path): | ||
file_path = file_or_folder_path + '/' + code_name.replace('/', '_') + '.zip' | ||
|
||
|
||
res = urlopen(file_link) | ||
|
||
with open(file_path, 'wb') as fd: | ||
while True: | ||
chunk = res.read(self.BULK_CHUNK_SIZE) | ||
if not chunk: | ||
break | ||
fd.write(chunk) | ||
|
||
return file_path | ||
else: | ||
print(Message.LONG_GENERATION_TIME) | ||
self._url_request(file_or_folder_path, **options) | ||
|
||
sleep(30) | ||
|
||
def _download_request_path(self): | ||
url = self.default_path() | ||
url = Util.constructed_path(url, {'id': self.code}) | ||
return url |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,8 @@ | |
from quandl.model.datatable import Datatable | ||
from mock import patch, call | ||
from test.factories.datatable import DatatableFactory | ||
from quandl.api_config import ApiConfig | ||
from quandl.errors.quandl_error import (InternalServerError, QuandlError) | ||
|
||
|
||
class GetDatatableDatasetTest(unittest.TestCase): | ||
|
@@ -48,3 +50,46 @@ def test_dataset_column_names_match_expected(self): | |
metadata = Datatable('ZACKS/FC').data_fields() | ||
six.assertCountEqual(self, | ||
metadata, [u'datatable_code', u'id', u'name', u'vendor_code']) | ||
|
||
|
||
class BulkDownloadDataTableTest(unittest.TestCase): | ||
|
||
|
||
@classmethod | ||
def setUpClass(cls): | ||
httpretty.enable() | ||
httpretty.register_uri(httpretty.GET, | ||
re.compile( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any reason for such formatting? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure, I was looking at the old tests... |
||
'https://www.quandl.com/api/v3/datatables/*'), | ||
adding_headers={ | ||
'Location': 'https://www.blah.com/download/db.zip' | ||
}, | ||
body='{}', status=302) | ||
httpretty.register_uri(httpretty.GET, | ||
re.compile('https://www.blah.com/'), body='{}') | ||
|
||
@classmethod | ||
def tearDownClass(cls): | ||
httpretty.disable() | ||
httpretty.reset() | ||
|
||
def setUp(self): | ||
datatable = {'datatable': DatatableFactory.build( | ||
vendor_code='ZACKS', datatable_code='FC')} | ||
self.datatable = Datatable(datatable['datatable']['datatable_code'], datatable['datatable']) | ||
ApiConfig.api_key = 'api_token' | ||
ApiConfig.api_version = '2015-04-09' | ||
|
||
def test_bulk_download_raises_exception_when_no_path(self): | ||
self.assertRaises( | ||
QuandlError, lambda: self.datatable.bulk_download_file(None)) | ||
|
||
def test_bulk_download_table_raises_exception_when_error_response(self): | ||
httpretty.register_uri(httpretty.GET, | ||
re.compile( | ||
'https://www.quandl.com/api/v3/datatables/*'), | ||
body=json.dumps( | ||
{'quandl_error': | ||
{'code': 'QEMx01', 'message': 'something went wrong'}}), | ||
status=500) | ||
self.assertRaises( | ||
InternalServerError, lambda: self.datatable.bulk_download_file('.')) |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function should be
bulk_download_table
to follow python convention.Although now I see theres already a
bulkdownload
function - maybe it would make sense to keep this name the way it is now to keep consistency.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah i was following the
bulkdownload
for timeseries data