Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix kwargs propagation in interfaces module #280

Merged
merged 18 commits into from
Mar 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 61 additions & 21 deletions erddapy/core/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
This module takes an URL or the bytes response of a request and converts it to Pandas,
XArray, Iris, etc. objects.
"""
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Dict, Optional

import pandas as pd

Expand All @@ -16,45 +16,85 @@
from netCDF4 import Dataset


def to_pandas(url: str, requests_kwargs=None, **kw) -> "pd.DataFrame":
"""Convert a URL to Pandas DataFrame."""
if requests_kwargs is None:
requests_kwargs = {}
data = urlopen(url, **requests_kwargs)
def to_pandas(
url: str,
requests_kwargs: Optional[Dict] = None,
pandas_kwargs: Optional[Dict] = None,
) -> "pd.DataFrame":
"""
Convert a URL to Pandas DataFrame.

url: URL to request data from.
requests_kwargs: arguments to be passed to urlopen method.
**pandas_kwargs: kwargs to be passed to third-party library (pandas).
"""
data = urlopen(url, **(requests_kwargs or {}))
try:
return pd.read_csv(data, **kw)
return pd.read_csv(data, **(pandas_kwargs or {}))
except Exception as e:
raise ValueError(f"Could not read url {url} with Pandas.read_csv.") from e


def to_ncCF(url: str, protocol: str = None, **kw) -> "Dataset":
"""Convert a URL to a netCDF4 Dataset."""
def to_ncCF(
url: str,
protocol: str = None,
requests_kwargs: Optional[Dict] = None,
) -> "Dataset":
"""
Convert a URL to a netCDF4 Dataset.

url: URL to request data from.
protocol: 'griddap' or 'tabledap'.
requests_kwargs: arguments to be passed to urlopen method (including auth).
"""
if protocol == "griddap":
raise ValueError(
f"Cannot use .ncCF with griddap protocol. The URL you tried to access is: '{url}'.",
)
auth = kw.pop("auth", None)
return _nc_dataset(url, auth=auth, **kw)
return _nc_dataset(url, requests_kwargs)


def to_xarray(
url: str,
response="opendap",
requests_kwargs: Optional[Dict] = None,
xarray_kwargs: Optional[Dict] = None,
) -> "xr.Dataset":
"""
Convert a URL to an xarray dataset.

def to_xarray(url: str, response="opendap", **kw) -> "xr.Dataset":
"""Convert a URL to an xarray dataset."""
url: URL to request data from.
response: type of response to be requested from the server.
requests_kwargs: arguments to be passed to urlopen method.
xarray_kwargs: kwargs to be passed to third-party library (xarray).
"""
import xarray as xr

auth = kw.pop("auth", None)
if response == "opendap":
return xr.open_dataset(url, **kw)
return xr.open_dataset(url, **(xarray_kwargs or {}))
else:
nc = _nc_dataset(url, auth=auth, **kw)
return xr.open_dataset(xr.backends.NetCDF4DataStore(nc), **kw)
nc = _nc_dataset(url, requests_kwargs)
return xr.open_dataset(
xr.backends.NetCDF4DataStore(nc), **(xarray_kwargs or {})
)


def to_iris(
url: str,
requests_kwargs: Optional[Dict] = None,
iris_kwargs: Optional[Dict] = None,
):
"""
Convert a URL to an iris CubeList.

def to_iris(url: str, **kw):
"""Convert a URL to an iris CubeList."""
url: URL to request data from.
requests_kwargs: arguments to be passed to urlopen method.
iris_kwargs: kwargs to be passed to third-party library (iris).
"""
import iris

data = urlopen(url, **kw)
data = urlopen(url, **(requests_kwargs or {}))
with _tempnc(data) as tmp:
cubes = iris.load_raw(tmp, **kw)
cubes = iris.load_raw(tmp, **(iris_kwargs or {}))
_ = [cube.data for cube in cubes]
return cubes
6 changes: 3 additions & 3 deletions erddapy/core/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@

from contextlib import contextmanager
from pathlib import Path
from typing import BinaryIO, Dict, Generator
from typing import BinaryIO, Dict, Generator, Optional
from urllib.parse import urlparse

from erddapy.core.url import urlopen


def _nc_dataset(url, auth, **requests_kwargs: Dict):
def _nc_dataset(url, requests_kwargs: Optional[Dict] = None):
"""Return a netCDF4-python Dataset from memory and fallbacks to disk if that fails."""
from netCDF4 import Dataset

data = urlopen(url=url, auth=auth, **requests_kwargs)
data = urlopen(url, requests_kwargs)
try:
return Dataset(Path(urlparse(url).path).name, memory=data.read())
except OSError:
Expand Down
9 changes: 7 additions & 2 deletions erddapy/core/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,19 @@ def _urlopen(url: str, auth: Optional[tuple] = None, **kwargs: Dict) -> BinaryIO
return io.BytesIO(response.content)


def urlopen(url: str, auth: Optional[tuple] = None, **kwargs: Dict) -> BinaryIO:
def urlopen(
url: str,
requests_kwargs: Optional[Dict] = None,
) -> BinaryIO:
"""Thin wrapper around httpx get content.

See httpx.get docs for the `params` and `kwargs` options.

"""
# Ignoring type checks here b/c mypy does not support decorated functions.
data = _urlopen(url=url, auth=auth, **kwargs) # type: ignore
if requests_kwargs is None:
requests_kwargs = {}
data = _urlopen(url, **requests_kwargs) # type: ignore
data.seek(0)
return data

Expand Down
13 changes: 7 additions & 6 deletions erddapy/erddapy.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def get_download_url(
def to_pandas(self, **kw):
"""Save a data request to a pandas.DataFrame.

Accepts any `pandas.read_csv` keyword arguments.
Accepts any `pandas.read_csv` keyword arguments, passed as a dictionary to pandas_kwargs.

This method uses the .csvp [1] response as the default for simplicity,
please check ERDDAP's documentation for the other csv options available.
Expand All @@ -344,13 +344,13 @@ def to_pandas(self, **kw):
"""
response = kw.pop("response", "csvp")
url = self.get_download_url(response=response, **kw)
return to_pandas(url, **kw)
return to_pandas(url, pandas_kwargs=dict(**kw))

def to_ncCF(self, protocol: str = None, **kw):
"""Load the data request into a Climate and Forecast compliant netCDF4-python object."""
protocol = protocol if protocol else self.protocol
url = self.get_download_url(response="ncCF", **kw)
return to_ncCF(url, protocol=protocol, **kw)
return to_ncCF(url, protocol=protocol, requests_kwargs=dict(**kw))

def to_xarray(self, **kw):
"""Load the data request into a xarray.Dataset.
Expand All @@ -364,7 +364,8 @@ def to_xarray(self, **kw):
else:
response = "ncCF"
url = self.get_download_url(response=response)
return to_xarray(url, response=response, auth=self.auth, **kw)
requests_kwargs = dict(auth=self.auth)
return to_xarray(url, response, requests_kwargs, xarray_kwargs=dict(**kw))

def to_iris(self, **kw):
"""Load the data request into an iris.CubeList.
Expand All @@ -373,7 +374,7 @@ def to_iris(self, **kw):
"""
response = "nc" if self.protocol == "griddap" else "ncCF"
url = self.get_download_url(response=response, **kw)
return to_iris(url, **kw)
return to_iris(url, iris_kwargs=dict(**kw))

@functools.lru_cache(maxsize=None)
def _get_variables(self, dataset_id: OptionalStr = None) -> Dict:
Expand All @@ -386,7 +387,7 @@ def _get_variables(self, dataset_id: OptionalStr = None) -> Dict:
url = self.get_info_url(dataset_id=dataset_id, response="csv")

variables = {}
data = urlopen(url, auth=self.auth, **self.requests_kwargs)
data = urlopen(url, self.requests_kwargs)
_df = pd.read_csv(data)
self._dataset_id = dataset_id
for variable in set(_df["Variable Name"]):
Expand Down
5 changes: 3 additions & 2 deletions notebooks/00-quick_intro.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
"]\n",
"\n",
"e.constraints = {\n",
" \"time>=\": \"now-7days\",\n",
" \"time>=\": \"2000-01-01\",\n",
"}\n",
"\n",
"\n",
Expand Down Expand Up @@ -179,6 +179,7 @@
" parse_dates=True,\n",
").dropna()\n",
"\n",
"\n",
"df.head()"
]
}
Expand All @@ -196,7 +197,7 @@
},
"gist_id": "3f0f25b13ade0c64c84607bd92903d1b",
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3.10.4 ('erddapy')",
"language": "python",
"name": "python3"
},
Expand Down
4 changes: 2 additions & 2 deletions notebooks/01a-griddap.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@
"source": [
"def bounds2contraints(bounds):\n",
" return {\n",
" \"longitude>=\": bounds.minx.squeeze(),\n",
" \"longitude<=\": bounds.maxx.squeeze(),\n",
" \"longitude>=\": 360 - abs(bounds.minx.squeeze()), # convert longitude to 360 format\n",
" \"longitude<=\": 360 - abs(bounds.maxx.squeeze()),\n",
" \"latitude>=\": bounds.miny.squeeze(),\n",
" \"latitude<=\": bounds.maxy.squeeze(),\n",
" }\n",
Expand Down
3 changes: 1 addition & 2 deletions tests/test_netcdf_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ def test__nc_dataset_in_memory_https():
from netCDF4 import Dataset

url = "http://erddap.ioos.us/erddap/tabledap/allDatasets.nc" # noqa
auth = None
_nc = _nc_dataset(url, auth)
_nc = _nc_dataset(url)
assert isinstance(_nc, Dataset)
assert _nc.filepath() == url.split("/")[-1]

Expand Down
5 changes: 4 additions & 1 deletion tests/test_to_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,10 @@ def test_to_pandas(dataset_tabledap):
"""Test converting tabledap to a pandas DataFrame."""
import pandas as pd

df = dataset_tabledap.to_pandas(index_col="time (UTC)", parse_dates=True).dropna()
df = dataset_tabledap.to_pandas(
index_col="time (UTC)",
parse_dates=True,
).dropna()

assert isinstance(df, pd.DataFrame)
assert df.index.name == "time (UTC)"
Expand Down