Skip to content

Gaia 20182 Increase read chunksize on iterlines() and remove chunksize from stream_data_series() #377

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions groclient/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,14 +654,12 @@ def get_data_series(self, **selection):
"""
return lib.get_data_series(self.access_token, self.api_host, **selection)

def stream_data_series(self, chunk_size=10000, **selection):
def stream_data_series(self, **selection):
"""Retrieve available data series for the given selections.
Similar to :meth:`~.get_data_series`, but API will stream data in chunk of given size
Similar to :meth:`~.get_data_series`, but API will stream data in a chunk of 10k data series

Parameters
----------
chunk_size : integer, optional
Number of data series to be returned in each chunk. Defaults to 10000
metric_id : integer, optional
item_id : integer, optional
region_id : integer, optional
Expand All @@ -685,9 +683,7 @@ def stream_data_series(self, chunk_size=10000, **selection):
}, { ... }, ... ]

"""
return lib.stream_data_series(
self.access_token, self.api_host, chunk_size, **selection
)
return lib.stream_data_series(self.access_token, self.api_host, **selection)

def search(self, entity_type, search_terms):
"""Search for the given search term. Better matches appear first.
Expand Down
2 changes: 2 additions & 0 deletions groclient/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,5 @@
"start_date",
"end_date",
]

ITR_CHUNK_READ_SIZE = 4096 * 1024 # 4 MB
14 changes: 9 additions & 5 deletions groclient/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
from builtins import str
from groclient import cfg
from collections import OrderedDict
from groclient.constants import REGION_LEVELS, DATA_SERIES_UNIQUE_TYPES_ID
from groclient.constants import (
REGION_LEVELS,
DATA_SERIES_UNIQUE_TYPES_ID,
ITR_CHUNK_READ_SIZE,
)
import groclient.utils
import json
import logging
Expand Down Expand Up @@ -429,16 +433,16 @@ def get_data_series(access_token, api_host, **selection):
raise Exception(resp.text)


def stream_data_series(access_token, api_host, chunk_size=None, **selection):
def stream_data_series(access_token, api_host, **selection):
logger = get_default_logger()
url = "/".join(["https:", "", api_host, "v2/stream/data_series/list"])
headers = {"authorization": "Bearer " + access_token}
params = get_params_from_selection(**selection)
if type(chunk_size) == int and chunk_size > 1:
params["chunkSize"] = chunk_size
resp = get_data(url, headers, params, logger, True)
try:
for line in resp.iter_lines(decode_unicode=True):
for line in resp.iter_lines(
chunk_size=ITR_CHUNK_READ_SIZE, decode_unicode=True
):
if line:
current_ds_list = json.loads(line)
if any(
Expand Down