Skip to content

Commit

Permalink
Merge pull request #280 from vinisalazar/fix-interfaces-kwargs
Browse files Browse the repository at this point in the history
Fix kwargs propagation in interfaces module
  • Loading branch information
ocefpaf authored Mar 10, 2023
2 parents eab6fa7 + 8322e67 commit c97cb91
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 39 deletions.
82 changes: 61 additions & 21 deletions erddapy/core/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
This module takes an URL or the bytes response of a request and converts it to Pandas,
XArray, Iris, etc. objects.
"""
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Dict, Optional

import pandas as pd

Expand All @@ -16,45 +16,85 @@
from netCDF4 import Dataset


def to_pandas(url: str, requests_kwargs=None, **kw) -> "pd.DataFrame":
"""Convert a URL to Pandas DataFrame."""
if requests_kwargs is None:
requests_kwargs = {}
data = urlopen(url, **requests_kwargs)
def to_pandas(
url: str,
requests_kwargs: Optional[Dict] = None,
pandas_kwargs: Optional[Dict] = None,
) -> "pd.DataFrame":
"""
Convert a URL to Pandas DataFrame.
url: URL to request data from.
requests_kwargs: arguments to be passed to urlopen method.
**pandas_kwargs: kwargs to be passed to third-party library (pandas).
"""
data = urlopen(url, **(requests_kwargs or {}))
try:
return pd.read_csv(data, **kw)
return pd.read_csv(data, **(pandas_kwargs or {}))
except Exception as e:
raise ValueError(f"Could not read url {url} with Pandas.read_csv.") from e


def to_ncCF(url: str, protocol: str = None, **kw) -> "Dataset":
"""Convert a URL to a netCDF4 Dataset."""
def to_ncCF(
url: str,
protocol: str = None,
requests_kwargs: Optional[Dict] = None,
) -> "Dataset":
"""
Convert a URL to a netCDF4 Dataset.
url: URL to request data from.
protocol: 'griddap' or 'tabledap'.
requests_kwargs: arguments to be passed to urlopen method (including auth).
"""
if protocol == "griddap":
raise ValueError(
f"Cannot use .ncCF with griddap protocol. The URL you tried to access is: '{url}'.",
)
auth = kw.pop("auth", None)
return _nc_dataset(url, auth=auth, **kw)
return _nc_dataset(url, requests_kwargs)


def to_xarray(
url: str,
response="opendap",
requests_kwargs: Optional[Dict] = None,
xarray_kwargs: Optional[Dict] = None,
) -> "xr.Dataset":
"""
Convert a URL to an xarray dataset.
def to_xarray(url: str, response="opendap", **kw) -> "xr.Dataset":
"""Convert a URL to an xarray dataset."""
url: URL to request data from.
response: type of response to be requested from the server.
requests_kwargs: arguments to be passed to urlopen method.
xarray_kwargs: kwargs to be passed to third-party library (xarray).
"""
import xarray as xr

auth = kw.pop("auth", None)
if response == "opendap":
return xr.open_dataset(url, **kw)
return xr.open_dataset(url, **(xarray_kwargs or {}))
else:
nc = _nc_dataset(url, auth=auth, **kw)
return xr.open_dataset(xr.backends.NetCDF4DataStore(nc), **kw)
nc = _nc_dataset(url, requests_kwargs)
return xr.open_dataset(
xr.backends.NetCDF4DataStore(nc), **(xarray_kwargs or {})
)


def to_iris(
url: str,
requests_kwargs: Optional[Dict] = None,
iris_kwargs: Optional[Dict] = None,
):
"""
Convert a URL to an iris CubeList.
def to_iris(url: str, **kw):
"""Convert a URL to an iris CubeList."""
url: URL to request data from.
requests_kwargs: arguments to be passed to urlopen method.
iris_kwargs: kwargs to be passed to third-party library (iris).
"""
import iris

data = urlopen(url, **kw)
data = urlopen(url, **(requests_kwargs or {}))
with _tempnc(data) as tmp:
cubes = iris.load_raw(tmp, **kw)
cubes = iris.load_raw(tmp, **(iris_kwargs or {}))
_ = [cube.data for cube in cubes]
return cubes
6 changes: 3 additions & 3 deletions erddapy/core/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@

from contextlib import contextmanager
from pathlib import Path
from typing import BinaryIO, Dict, Generator
from typing import BinaryIO, Dict, Generator, Optional
from urllib.parse import urlparse

from erddapy.core.url import urlopen


def _nc_dataset(url, auth, **requests_kwargs: Dict):
def _nc_dataset(url, requests_kwargs: Optional[Dict] = None):
"""Return a netCDF4-python Dataset from memory and fallbacks to disk if that fails."""
from netCDF4 import Dataset

data = urlopen(url=url, auth=auth, **requests_kwargs)
data = urlopen(url, requests_kwargs)
try:
return Dataset(Path(urlparse(url).path).name, memory=data.read())
except OSError:
Expand Down
9 changes: 7 additions & 2 deletions erddapy/core/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,19 @@ def _urlopen(url: str, auth: Optional[tuple] = None, **kwargs: Dict) -> BinaryIO
return io.BytesIO(response.content)


def urlopen(url: str, auth: Optional[tuple] = None, **kwargs: Dict) -> BinaryIO:
def urlopen(
url: str,
requests_kwargs: Optional[Dict] = None,
) -> BinaryIO:
"""Thin wrapper around httpx get content.
See httpx.get docs for the `params` and `kwargs` options.
"""
# Ignoring type checks here b/c mypy does not support decorated functions.
data = _urlopen(url=url, auth=auth, **kwargs) # type: ignore
if requests_kwargs is None:
requests_kwargs = {}
data = _urlopen(url, **requests_kwargs) # type: ignore
data.seek(0)
return data

Expand Down
13 changes: 7 additions & 6 deletions erddapy/erddapy.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def get_download_url(
def to_pandas(self, **kw):
"""Save a data request to a pandas.DataFrame.
Accepts any `pandas.read_csv` keyword arguments.
Accepts any `pandas.read_csv` keyword arguments, passed as a dictionary to pandas_kwargs.
This method uses the .csvp [1] response as the default for simplicity,
please check ERDDAP's documentation for the other csv options available.
Expand All @@ -344,13 +344,13 @@ def to_pandas(self, **kw):
"""
response = kw.pop("response", "csvp")
url = self.get_download_url(response=response, **kw)
return to_pandas(url, **kw)
return to_pandas(url, pandas_kwargs=dict(**kw))

def to_ncCF(self, protocol: str = None, **kw):
"""Load the data request into a Climate and Forecast compliant netCDF4-python object."""
protocol = protocol if protocol else self.protocol
url = self.get_download_url(response="ncCF", **kw)
return to_ncCF(url, protocol=protocol, **kw)
return to_ncCF(url, protocol=protocol, requests_kwargs=dict(**kw))

def to_xarray(self, **kw):
"""Load the data request into a xarray.Dataset.
Expand All @@ -364,7 +364,8 @@ def to_xarray(self, **kw):
else:
response = "ncCF"
url = self.get_download_url(response=response)
return to_xarray(url, response=response, auth=self.auth, **kw)
requests_kwargs = dict(auth=self.auth)
return to_xarray(url, response, requests_kwargs, xarray_kwargs=dict(**kw))

def to_iris(self, **kw):
"""Load the data request into an iris.CubeList.
Expand All @@ -373,7 +374,7 @@ def to_iris(self, **kw):
"""
response = "nc" if self.protocol == "griddap" else "ncCF"
url = self.get_download_url(response=response, **kw)
return to_iris(url, **kw)
return to_iris(url, iris_kwargs=dict(**kw))

@functools.lru_cache(maxsize=None)
def _get_variables(self, dataset_id: OptionalStr = None) -> Dict:
Expand All @@ -386,7 +387,7 @@ def _get_variables(self, dataset_id: OptionalStr = None) -> Dict:
url = self.get_info_url(dataset_id=dataset_id, response="csv")

variables = {}
data = urlopen(url, auth=self.auth, **self.requests_kwargs)
data = urlopen(url, self.requests_kwargs)
_df = pd.read_csv(data)
self._dataset_id = dataset_id
for variable in set(_df["Variable Name"]):
Expand Down
5 changes: 3 additions & 2 deletions notebooks/00-quick_intro.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
"]\n",
"\n",
"e.constraints = {\n",
" \"time>=\": \"now-7days\",\n",
" \"time>=\": \"2000-01-01\",\n",
"}\n",
"\n",
"\n",
Expand Down Expand Up @@ -179,6 +179,7 @@
" parse_dates=True,\n",
").dropna()\n",
"\n",
"\n",
"df.head()"
]
}
Expand All @@ -196,7 +197,7 @@
},
"gist_id": "3f0f25b13ade0c64c84607bd92903d1b",
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3.10.4 ('erddapy')",
"language": "python",
"name": "python3"
},
Expand Down
4 changes: 2 additions & 2 deletions notebooks/01a-griddap.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@
"source": [
"def bounds2contraints(bounds):\n",
" return {\n",
" \"longitude>=\": bounds.minx.squeeze(),\n",
" \"longitude<=\": bounds.maxx.squeeze(),\n",
" \"longitude>=\": 360 - abs(bounds.minx.squeeze()), # convert longitude to 360 format\n",
" \"longitude<=\": 360 - abs(bounds.maxx.squeeze()),\n",
" \"latitude>=\": bounds.miny.squeeze(),\n",
" \"latitude<=\": bounds.maxy.squeeze(),\n",
" }\n",
Expand Down
3 changes: 1 addition & 2 deletions tests/test_netcdf_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ def test__nc_dataset_in_memory_https():
from netCDF4 import Dataset

url = "http://erddap.ioos.us/erddap/tabledap/allDatasets.nc" # noqa
auth = None
_nc = _nc_dataset(url, auth)
_nc = _nc_dataset(url)
assert isinstance(_nc, Dataset)
assert _nc.filepath() == url.split("/")[-1]

Expand Down
5 changes: 4 additions & 1 deletion tests/test_to_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,10 @@ def test_to_pandas(dataset_tabledap):
"""Test converting tabledap to a pandas DataFrame."""
import pandas as pd

df = dataset_tabledap.to_pandas(index_col="time (UTC)", parse_dates=True).dropna()
df = dataset_tabledap.to_pandas(
index_col="time (UTC)",
parse_dates=True,
).dropna()

assert isinstance(df, pd.DataFrame)
assert df.index.name == "time (UTC)"
Expand Down

0 comments on commit c97cb91

Please sign in to comment.