Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
poetry-version: ["1.7.1"]
poetry-version: ["1.8.2"]
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}

Expand Down
41 changes: 19 additions & 22 deletions cmr/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,38 +44,35 @@ def get(self, limit=2000):
:returns: query results as a list
"""

page_size = min(limit, 2000)
url = self._build_url()

results = []
headers = self.headers.copy() if self.headers else {}
more_results = True
n_results = 0

while more_results:
# Only get what we need
page_size = min(limit - len(results), page_size)
response = get(url, headers=self.headers, params={'page_size': page_size})
if self.headers is None:
self.headers = {}
self.headers['cmr-search-after'] = response.headers.get('cmr-search-after')

try:
response.raise_for_status()
except exceptions.HTTPError as ex:
raise RuntimeError(ex.response.text)
# Only get what we need on the last page.
page_size = min(limit - n_results, 2000)
response = get(url, headers=headers, params={"page_size": page_size})
response.raise_for_status()

if self._format == "json":
latest = response.json()['feed']['entry']
else:
latest = [response.text]
# Explicitly track the number of results we have because the length
# of the results list will only match the number of entries fetched
# when the format is JSON. Otherwise, the length of the results
# list is the number of *pages* fetched, not the number of *items*.
n_results += page_size

results.extend(latest)
results.extend(
response.json()["feed"]["entry"]
if self._format == "json"
else [response.text]
)

if page_size > len(response.json()['feed']['entry']) or len(results) >= limit:
more_results = False
if cmr_search_after := response.headers.get("cmr-search-after"):
headers["cmr-search-after"] = cmr_search_after

# This header is transient. We need to get rid of it before we do another different query
if self.headers['cmr-search-after']:
del self.headers['cmr-search-after']
more_results = n_results < limit and cmr_search_after is not None

return results

Expand Down
Loading