Merge pull request #280 from vinisalazar/fix-interfaces-kwargs

Fix kwargs propagation in interfaces module
ioos · Mar 10, 2023 · c97cb91 · c97cb91
2 parents eab6fa7 + 8322e67
commit c97cb91
Show file tree

Hide file tree

Showing 8 changed files with 88 additions and 39 deletions.
diff --git a/erddapy/core/interfaces.py b/erddapy/core/interfaces.py
@@ -4,7 +4,7 @@
 This module takes an URL or the bytes response of a request and converts it to Pandas,
 XArray, Iris, etc. objects.
 """
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Dict, Optional
 
 import pandas as pd
 
@@ -16,45 +16,85 @@
     from netCDF4 import Dataset
 
 
-def to_pandas(url: str, requests_kwargs=None, **kw) -> "pd.DataFrame":
-    """Convert a URL to Pandas DataFrame."""
-    if requests_kwargs is None:
-        requests_kwargs = {}
-    data = urlopen(url, **requests_kwargs)
+def to_pandas(
+    url: str,
+    requests_kwargs: Optional[Dict] = None,
+    pandas_kwargs: Optional[Dict] = None,
+) -> "pd.DataFrame":
+    """
+    Convert a URL to Pandas DataFrame.
+
+    url: URL to request data from.
+    requests_kwargs: arguments to be passed to urlopen method.
+    **pandas_kwargs: kwargs to be passed to third-party library (pandas).
+    """
+    data = urlopen(url, **(requests_kwargs or {}))
     try:
-        return pd.read_csv(data, **kw)
+        return pd.read_csv(data, **(pandas_kwargs or {}))
     except Exception as e:
         raise ValueError(f"Could not read url {url} with Pandas.read_csv.") from e
 
 
-def to_ncCF(url: str, protocol: str = None, **kw) -> "Dataset":
-    """Convert a URL to a netCDF4 Dataset."""
+def to_ncCF(
+    url: str,
+    protocol: str = None,
+    requests_kwargs: Optional[Dict] = None,
+) -> "Dataset":
+    """
+    Convert a URL to a netCDF4 Dataset.
+
+    url: URL to request data from.
+    protocol: 'griddap' or 'tabledap'.
+    requests_kwargs: arguments to be passed to urlopen method (including auth).
+    """
     if protocol == "griddap":
         raise ValueError(
             f"Cannot use .ncCF with griddap protocol. The URL you tried to access is: '{url}'.",
         )
-    auth = kw.pop("auth", None)
-    return _nc_dataset(url, auth=auth, **kw)
+    return _nc_dataset(url, requests_kwargs)
+
 
+def to_xarray(
+    url: str,
+    response="opendap",
+    requests_kwargs: Optional[Dict] = None,
+    xarray_kwargs: Optional[Dict] = None,
+) -> "xr.Dataset":
+    """
+    Convert a URL to an xarray dataset.
 
-def to_xarray(url: str, response="opendap", **kw) -> "xr.Dataset":
-    """Convert a URL to an xarray dataset."""
+    url: URL to request data from.
+    response: type of response to be requested from the server.
+    requests_kwargs: arguments to be passed to urlopen method.
+    xarray_kwargs: kwargs to be passed to third-party library (xarray).
+    """
     import xarray as xr
 
-    auth = kw.pop("auth", None)
     if response == "opendap":
-        return xr.open_dataset(url, **kw)
+        return xr.open_dataset(url, **(xarray_kwargs or {}))
     else:
-        nc = _nc_dataset(url, auth=auth, **kw)
-        return xr.open_dataset(xr.backends.NetCDF4DataStore(nc), **kw)
+        nc = _nc_dataset(url, requests_kwargs)
+        return xr.open_dataset(
+            xr.backends.NetCDF4DataStore(nc), **(xarray_kwargs or {})
+        )
+
 
+def to_iris(
+    url: str,
+    requests_kwargs: Optional[Dict] = None,
+    iris_kwargs: Optional[Dict] = None,
+):
+    """
+    Convert a URL to an iris CubeList.
 
-def to_iris(url: str, **kw):
-    """Convert a URL to an iris CubeList."""
+    url: URL to request data from.
+    requests_kwargs: arguments to be passed to urlopen method.
+    iris_kwargs: kwargs to be passed to third-party library (iris).
+    """
     import iris
 
-    data = urlopen(url, **kw)
+    data = urlopen(url, **(requests_kwargs or {}))
     with _tempnc(data) as tmp:
-        cubes = iris.load_raw(tmp, **kw)
+        cubes = iris.load_raw(tmp, **(iris_kwargs or {}))
         _ = [cube.data for cube in cubes]
         return cubes
diff --git a/erddapy/core/netcdf.py b/erddapy/core/netcdf.py
@@ -2,17 +2,17 @@
 
 from contextlib import contextmanager
 from pathlib import Path
-from typing import BinaryIO, Dict, Generator
+from typing import BinaryIO, Dict, Generator, Optional
 from urllib.parse import urlparse
 
 from erddapy.core.url import urlopen
 
 
-def _nc_dataset(url, auth, **requests_kwargs: Dict):
+def _nc_dataset(url, requests_kwargs: Optional[Dict] = None):
     """Return a netCDF4-python Dataset from memory and fallbacks to disk if that fails."""
     from netCDF4 import Dataset
 
-    data = urlopen(url=url, auth=auth, **requests_kwargs)
+    data = urlopen(url, requests_kwargs)
     try:
         return Dataset(Path(urlparse(url).path).name, memory=data.read())
     except OSError:

diff --git a/erddapy/core/url.py b/erddapy/core/url.py
@@ -25,14 +25,19 @@ def _urlopen(url: str, auth: Optional[tuple] = None, **kwargs: Dict) -> BinaryIO
     return io.BytesIO(response.content)
 
 
-def urlopen(url: str, auth: Optional[tuple] = None, **kwargs: Dict) -> BinaryIO:
+def urlopen(
+    url: str,
+    requests_kwargs: Optional[Dict] = None,
+) -> BinaryIO:
     """Thin wrapper around httpx get content.
 
     See httpx.get docs for the `params` and `kwargs` options.
 
     """
     # Ignoring type checks here b/c mypy does not support decorated functions.
-    data = _urlopen(url=url, auth=auth, **kwargs)  # type: ignore
+    if requests_kwargs is None:
+        requests_kwargs = {}
+    data = _urlopen(url, **requests_kwargs)  # type: ignore
     data.seek(0)
     return data
 

diff --git a/erddapy/erddapy.py b/erddapy/erddapy.py
@@ -334,7 +334,7 @@ def get_download_url(
     def to_pandas(self, **kw):
         """Save a data request to a pandas.DataFrame.
 
-        Accepts any `pandas.read_csv` keyword arguments.
+        Accepts any `pandas.read_csv` keyword arguments, passed as a dictionary to pandas_kwargs.
 
         This method uses the .csvp [1] response as the default for simplicity,
         please check ERDDAP's documentation for the other csv options available.
@@ -344,13 +344,13 @@ def to_pandas(self, **kw):
         """
         response = kw.pop("response", "csvp")
         url = self.get_download_url(response=response, **kw)
-        return to_pandas(url, **kw)
+        return to_pandas(url, pandas_kwargs=dict(**kw))
 
     def to_ncCF(self, protocol: str = None, **kw):
         """Load the data request into a Climate and Forecast compliant netCDF4-python object."""
         protocol = protocol if protocol else self.protocol
         url = self.get_download_url(response="ncCF", **kw)
-        return to_ncCF(url, protocol=protocol, **kw)
+        return to_ncCF(url, protocol=protocol, requests_kwargs=dict(**kw))
 
     def to_xarray(self, **kw):
         """Load the data request into a xarray.Dataset.
@@ -364,7 +364,8 @@ def to_xarray(self, **kw):
         else:
             response = "ncCF"
         url = self.get_download_url(response=response)
-        return to_xarray(url, response=response, auth=self.auth, **kw)
+        requests_kwargs = dict(auth=self.auth)
+        return to_xarray(url, response, requests_kwargs, xarray_kwargs=dict(**kw))
 
     def to_iris(self, **kw):
         """Load the data request into an iris.CubeList.
@@ -373,7 +374,7 @@ def to_iris(self, **kw):
         """
         response = "nc" if self.protocol == "griddap" else "ncCF"
         url = self.get_download_url(response=response, **kw)
-        return to_iris(url, **kw)
+        return to_iris(url, iris_kwargs=dict(**kw))
 
     @functools.lru_cache(maxsize=None)
     def _get_variables(self, dataset_id: OptionalStr = None) -> Dict:
@@ -386,7 +387,7 @@ def _get_variables(self, dataset_id: OptionalStr = None) -> Dict:
         url = self.get_info_url(dataset_id=dataset_id, response="csv")
 
         variables = {}
-        data = urlopen(url, auth=self.auth, **self.requests_kwargs)
+        data = urlopen(url, self.requests_kwargs)
         _df = pd.read_csv(data)
         self._dataset_id = dataset_id
         for variable in set(_df["Variable Name"]):

diff --git a/notebooks/00-quick_intro.ipynb b/notebooks/00-quick_intro.ipynb
@@ -67,7 +67,7 @@
     "]\n",
     "\n",
     "e.constraints = {\n",
-    "    \"time>=\": \"now-7days\",\n",
+    "    \"time>=\": \"2000-01-01\",\n",
     "}\n",
     "\n",
     "\n",
@@ -179,6 +179,7 @@
     "    parse_dates=True,\n",
     ").dropna()\n",
     "\n",
+    "\n",
     "df.head()"
    ]
   }
@@ -196,7 +197,7 @@
   },
   "gist_id": "3f0f25b13ade0c64c84607bd92903d1b",
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3.10.4 ('erddapy')",
    "language": "python",
    "name": "python3"
   },

diff --git a/notebooks/01a-griddap.ipynb b/notebooks/01a-griddap.ipynb
@@ -129,8 +129,8 @@
    "source": [
     "def bounds2contraints(bounds):\n",
     "    return {\n",
-    "        \"longitude>=\": bounds.minx.squeeze(),\n",
-    "        \"longitude<=\": bounds.maxx.squeeze(),\n",
+    "        \"longitude>=\": 360 - abs(bounds.minx.squeeze()),  # convert longitude to 360 format\n",
+    "        \"longitude<=\": 360 - abs(bounds.maxx.squeeze()),\n",
     "        \"latitude>=\": bounds.miny.squeeze(),\n",
     "        \"latitude<=\": bounds.maxy.squeeze(),\n",
     "    }\n",

diff --git a/tests/test_netcdf_handling.py b/tests/test_netcdf_handling.py
@@ -16,8 +16,7 @@ def test__nc_dataset_in_memory_https():
     from netCDF4 import Dataset
 
     url = "http://erddap.ioos.us/erddap/tabledap/allDatasets.nc"  # noqa
-    auth = None
-    _nc = _nc_dataset(url, auth)
+    _nc = _nc_dataset(url)
     assert isinstance(_nc, Dataset)
     assert _nc.filepath() == url.split("/")[-1]
 

diff --git a/tests/test_to_objects.py b/tests/test_to_objects.py
@@ -104,7 +104,10 @@ def test_to_pandas(dataset_tabledap):
     """Test converting tabledap to a pandas DataFrame."""
     import pandas as pd
 
-    df = dataset_tabledap.to_pandas(index_col="time (UTC)", parse_dates=True).dropna()
+    df = dataset_tabledap.to_pandas(
+        index_col="time (UTC)",
+        parse_dates=True,
+    ).dropna()
 
     assert isinstance(df, pd.DataFrame)
     assert df.index.name == "time (UTC)"