Skip to content

Commit e3c99f5

Browse files
author
Dario Varotto
committed
Download datafile chucks: more chunk options
SPLIT_WEEKLY and SPLIT_DAILY
1 parent 3182165 commit e3c99f5

File tree

7 files changed

+79
-20
lines changed

7 files changed

+79
-20
lines changed

ravenpackapi/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from ravenpackapi.utils.constants import JSON_AVAILABLE_FIELDS
1515

1616
_VALID_METHODS = ('get', 'post', 'put', 'delete')
17-
VERSION = '1.0.18'
17+
VERSION = '1.0.19'
1818

1919
logger = logging.getLogger("ravenpack.core")
2020

ravenpackapi/examples/get_datafile_chunked.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,23 @@
88
import os
99

1010
from ravenpackapi import RPApi
11-
from ravenpackapi.util import time_intervals, SPLIT_YEARLY
11+
from ravenpackapi.util import time_intervals, SPLIT_WEEKLY
1212

1313
api = RPApi(api_key='YOUR_API_KEY')
1414
ds = api.get_dataset('YOUR_DATASET_ID')
1515

16-
start_date = '2000-01-01'
16+
start_date = '2018-01-01'
1717
end_date = '2018-01-10'
1818
GET_COMPRESSED = True
1919

2020
output_folder = './output'
2121

2222
os.makedirs(output_folder, exist_ok=True) # create folder for output
2323
for range_start, range_end in time_intervals(start_date, end_date,
24-
split=SPLIT_YEARLY,
25-
# another option is split=SPLIT_MONTHLY
26-
# (that is the default)
24+
split=SPLIT_WEEKLY,
25+
# available splits:
26+
# SPLIT_YEARLY, SPLIT_WEEKLY, SPLIT_DAILY
27+
# or SPLIT_MONTHLY (the default)
2728
):
2829
job = ds.request_datafile(
2930
start_date=range_start,
@@ -35,7 +36,7 @@
3536
continue
3637
filename = os.path.join(output_folder,
3738
"datafile-{datestr}.{ext}".format(
38-
datestr=range_start.strftime('%Y'),
39+
datestr=range_start.strftime('%Y-%m-%d'),
3940
ext='zip' if GET_COMPRESSED else 'csv')
4041
)
4142
print("Saving", range_start, "-", range_end, "=>", filename)

ravenpackapi/tests/test_date_intervals.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import datetime
22

3-
from ravenpackapi.util import time_intervals, SPLIT_YEARLY, SPLIT_MONTHLY
3+
import pytest
4+
5+
from ravenpackapi.util import time_intervals, SPLIT_YEARLY, SPLIT_MONTHLY, SPLIT_WEEKLY, SPLIT_DAILY
46

57

68
class TestTimeRanges(object):
@@ -66,12 +68,41 @@ def test_minutes_years(self):
6668
('2016-01-01 00:00', '2016-01-10 11:30'),
6769
]
6870

69-
def test_very_short_interval(self):
71+
def test_weekly(self):
72+
start = '2017-12-20 15:00'
73+
end = '2018-01-08 11:30'
74+
intervals = [tuple(map(lambda d: d.strftime("%Y-%m-%d %H:%M"), rng))
75+
for rng in time_intervals(start, end,
76+
split=SPLIT_WEEKLY)]
77+
assert intervals == [
78+
('2017-12-20 15:00', '2017-12-25 00:00'),
79+
('2017-12-25 00:00', '2018-01-01 00:00'),
80+
('2018-01-01 00:00', '2018-01-08 00:00'),
81+
('2018-01-08 00:00', '2018-01-08 11:30'),
82+
]
83+
84+
def test_daily(self):
85+
start = '2017-12-29 15:00'
86+
end = '2018-01-02 11:30'
87+
intervals = [tuple(map(lambda d: d.strftime("%Y-%m-%d %H:%M"), rng))
88+
for rng in time_intervals(start, end,
89+
split=SPLIT_DAILY)]
90+
assert intervals == [
91+
('2017-12-29 15:00', '2017-12-30 00:00'),
92+
('2017-12-30 00:00', '2017-12-31 00:00'),
93+
('2017-12-31 00:00', '2018-01-01 00:00'),
94+
('2018-01-01 00:00', '2018-01-02 00:00'),
95+
('2018-01-02 00:00', '2018-01-02 11:30'),
96+
]
97+
98+
@pytest.mark.parametrize("split", [SPLIT_MONTHLY, SPLIT_YEARLY,
99+
SPLIT_WEEKLY, SPLIT_DAILY])
100+
def test_very_short_interval(self, split):
70101
start = '2004-02-29 15:00'
71102
end = '2004-02-29 16:00'
72103
intervals = [tuple(map(lambda d: d.strftime("%Y-%m-%d %H:%M"), rng))
73104
for rng in time_intervals(start, end,
74-
split=SPLIT_YEARLY)]
105+
split=split)]
75106
assert intervals == [
76107
(start, end),
77108
]

ravenpackapi/util.py

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
1-
import datetime
1+
from dateutil.relativedelta import relativedelta
2+
from dateutil.rrule import MO
23

34
from ravenpackapi.utils.date_formats import as_datetime
45

56
SPLIT_YEARLY = 'yearly'
67
SPLIT_MONTHLY = 'monthly'
8+
SPLIT_WEEKLY = 'weekly'
9+
SPLIT_DAILY = 'daily'
710

811

912
def parts_to_curl(method, endpoint, headers, data=None):
@@ -35,21 +38,43 @@ def to_curl(request):
3538

3639

3740
def time_intervals(date_start, date_end, split=SPLIT_MONTHLY):
38-
assert split in (SPLIT_MONTHLY, SPLIT_YEARLY)
41+
assert split in (
42+
SPLIT_YEARLY,
43+
SPLIT_MONTHLY,
44+
SPLIT_WEEKLY,
45+
SPLIT_DAILY)
3946
start = as_datetime(date_start)
4047
date_end = as_datetime(date_end)
4148

4249
def get_end(get_next_end):
43-
result = get_next_end
4450
if split == SPLIT_MONTHLY:
4551
# up to beginning of next month
46-
result = result.replace(day=1) + datetime.timedelta(days=32)
47-
return result.replace(day=1,
48-
hour=0, minute=0, second=0, microsecond=0)
52+
return get_next_end + \
53+
relativedelta(
54+
months=+1,
55+
day=1, hour=0, minute=0, second=0, microsecond=0
56+
)
4957
elif split == SPLIT_YEARLY:
5058
# up to beginning of next year
51-
return result.replace(result.year + 1, month=1, day=1,
52-
hour=0, minute=0, second=0, microsecond=0)
59+
return get_next_end + \
60+
relativedelta(
61+
years=+1,
62+
month=1, day=1, hour=0, minute=0, second=0, microsecond=0
63+
)
64+
elif split == SPLIT_WEEKLY:
65+
# will break the time on weeks starting on Mondays
66+
return get_next_end + \
67+
relativedelta(
68+
days=+1, weekday=MO,
69+
hour=0, minute=0, second=0, microsecond=0
70+
)
71+
elif split == SPLIT_DAILY:
72+
# will break the time on weeks starting on Mondays
73+
return get_next_end + \
74+
relativedelta(
75+
days=+1,
76+
hour=0, minute=0, second=0, microsecond=0
77+
)
5378

5479
while True:
5580
# some datetime trick to get the beginning of next month

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
requests[security]
22
future
3+
python-dateutil

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from setuptools import setup, find_packages
22

3-
VERSION = '1.0.18'
3+
VERSION = '1.0.19'
44

55
with open('README.rst') as readme_file:
66
readme = readme_file.read()
@@ -35,5 +35,5 @@
3535
],
3636

3737
keywords='python analytics api rest news data',
38-
install_requires=['requests[security]', 'future'],
38+
install_requires=['requests[security]', 'future', 'python-dateutil'],
3939
)

tox.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ deps =
66
pytest
77
pytest-xdist
88
future
9+
python-dateutil
910
requests
1011
commands = pytest -n 4
1112
passenv = RP_API_KEY RP_API_ENDPOINT

0 commit comments

Comments
 (0)