I am currently trying to read netCDF files directly from a THREADS server or from Azure blob storage using xarray.open_dataset using a url. I have tested NetCDFs from several sources and the files themselves appear to be fine. xarray_open_dataset works if I download them to my local environment using either wget.download or just moving the files manually.
MCVE Code Sample
import xarray as xr
url = 'https://ooiopendata.blob.core.windows.net/botpt/NOAA_NCDC_ERSST_v3b_SST.nc'
ds = xr.open_dataset(url)
I have also tested it with these additional urls:
url = 'https://opendap.oceanobservatories.org/async_results/dax.soule@qc.cuny.edu/20191227T065318736Z-RS03ECAL-MJ03E-06-BOTPTA302-streamed-botpt_nano_sample/deployment0001_RS03ECAL-MJ03E-06-BOTPTA302-streamed-botpt_nano_sample_20141005T000000-20141005T235959.nc'
url = 'https://opendap.oceanobservatories.org/thredds/fileServer/ooi/dax.soule@qc.cuny.edu/20191228T052214778Z-RS03ECAL-MJ03E-06-BOTPTA302-streamed-botpt_nano_sample/deployment0001_RS03ECAL-MJ03E-06-BOTPTA302-streamed-botpt_nano_sample_20191005T000000-20191005T235959.950000.nc'
Expected Output - In all cases, the expected output is the xarray dataset:
<xarray.Dataset>
Dimensions: (time: 1727934)
Coordinates:
obs (time) int32 0 1 2 3 4 ... 1727930 1727931 1727932 1727933
* time (time) datetime64[ns] 2019-10-05 ... 2019-10-05T23:59:59.949999616
Data variables:
bottom_pressure (time) float32 ...
press_trans_temp (time) float64 ...
sensor_id (time) object ...
Attributes:
Problem Description
As far as I can tell, permissions are not the issue and neither are the files because the url's will download the files and xarray will open the files that I download. The only thing I am not able to do is open the files with xarray directly from the server. Any help or suggestions are greatly appreciated.
Output of xr.open_dataset(url)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/srv/conda/envs/notebook/lib/python3.7/site-packages/xarray/backends/file_manager.py in _acquire_with_cache_info(self, needs_lock)
197 try:
--> 198 file = self._cache[self._key]
199 except KeyError:
/srv/conda/envs/notebook/lib/python3.7/site-packages/xarray/backends/lru_cache.py in __getitem__(self, key)
52 with self._lock:
---> 53 value = self._cache[key]
54 self._cache.move_to_end(key)
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('https://ooiopendata.blob.core.windows.net/botpt/NOAA_NCDC_ERSST_v3b_SST.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
<ipython-input-25-2eaba4f9d881> in <module>
----> 1 ds1 = xr.open_dataset('https://ooiopendata.blob.core.windows.net/botpt/NOAA_NCDC_ERSST_v3b_SST.nc')
2 ds1
/srv/conda/envs/notebook/lib/python3.7/site-packages/xarray/backends/api.py in open_dataset(filename_or_obj, group, decode_cf, mask_and_scale, decode_times, autoclose, concat_characters, decode_coords, engine, chunks, lock, cache, drop_variables, backend_kwargs, use_cftime)
497 if engine == "netcdf4":
498 store = backends.NetCDF4DataStore.open(
--> 499 filename_or_obj, group=group, lock=lock, **backend_kwargs
500 )
501 elif engine == "scipy":
/srv/conda/envs/notebook/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
355 netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
356 )
--> 357 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
358
359 def _acquire(self, needs_lock=True):
/srv/conda/envs/notebook/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in __init__(self, manager, group, mode, lock, autoclose)
311 self._group = group
312 self._mode = mode
--> 313 self.format = self.ds.data_model
314 self._filename = self.ds.filepath()
315 self.is_remote = is_remote_uri(self._filename)
/srv/conda/envs/notebook/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in ds(self)
364 @property
365 def ds(self):
--> 366 return self._acquire()
367
368 def open_store_variable(self, name, var):
/srv/conda/envs/notebook/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in _acquire(self, needs_lock)
358
359 def _acquire(self, needs_lock=True):
--> 360 with self._manager.acquire_context(needs_lock) as root:
361 ds = _nc4_require_group(root, self._group, self._mode)
362 return ds
/srv/conda/envs/notebook/lib/python3.7/contextlib.py in __enter__(self)
110 del self.args, self.kwds, self.func
111 try:
--> 112 return next(self.gen)
113 except StopIteration:
114 raise RuntimeError("generator didn't yield") from None
/srv/conda/envs/notebook/lib/python3.7/site-packages/xarray/backends/file_manager.py in acquire_context(self, needs_lock)
184 def acquire_context(self, needs_lock=True):
185 """Context manager for acquiring a file."""
--> 186 file, cached = self._acquire_with_cache_info(needs_lock)
187 try:
188 yield file
/srv/conda/envs/notebook/lib/python3.7/site-packages/xarray/backends/file_manager.py in _acquire_with_cache_info(self, needs_lock)
202 kwargs = kwargs.copy()
203 kwargs["mode"] = self._mode
--> 204 file = self._opener(*self._args, **kwargs)
205 if self._mode == "w":
206 # ensure file doesn't get overriden when opened again
netCDF4/_netCDF4.pyx in netCDF4._netCDF4.Dataset.__init__()
netCDF4/_netCDF4.pyx in netCDF4._netCDF4._ensure_nc_success()
OSError: [Errno -90] NetCDF: file not found: b'https://ooiopendata.blob.core.windows.net/botpt/NOAA_NCDC_ERSST_v3b_SST.nc'
cc @tjcrone
I am currently trying to read netCDF files directly from a THREADS server or from Azure blob storage using xarray.open_dataset using a url. I have tested NetCDFs from several sources and the files themselves appear to be fine. xarray_open_dataset works if I download them to my local environment using either wget.download or just moving the files manually.
MCVE Code Sample
I have also tested it with these additional urls:
Expected Output - In all cases, the expected output is the xarray dataset:
Problem Description
As far as I can tell, permissions are not the issue and neither are the files because the url's will download the files and xarray will open the files that I download. The only thing I am not able to do is open the files with xarray directly from the server. Any help or suggestions are greatly appreciated.
Output of
xr.open_dataset(url)cc @tjcrone