@@ -75,6 +75,34 @@ def _get_default_engine_netcdf():
7575 return engine
7676
7777
78+ def _get_engine_from_magic_number (filename_or_obj ):
79+ # check byte header to determine file type
80+ if isinstance (filename_or_obj , bytes ):
81+ magic_number = filename_or_obj [:8 ]
82+ else :
83+ if filename_or_obj .tell () != 0 :
84+ raise ValueError ("file-like object read/write pointer not at zero "
85+ "please close and reopen, or use a context "
86+ "manager" )
87+ magic_number = filename_or_obj .read (8 )
88+ filename_or_obj .seek (0 )
89+
90+ if magic_number .startswith (b'CDF' ):
91+ engine = 'scipy'
92+ elif magic_number .startswith (b'\211 HDF\r \n \032 \n ' ):
93+ engine = 'h5netcdf'
94+ if isinstance (filename_or_obj , bytes ):
95+ raise ValueError ("can't open netCDF4/HDF5 as bytes "
96+ "try passing a path or file-like object" )
97+ else :
98+ if isinstance (filename_or_obj , bytes ) and len (filename_or_obj ) > 80 :
99+ filename_or_obj = filename_or_obj [:80 ] + b'...'
100+ raise ValueError ('{} is not a valid netCDF file '
101+ 'did you mean to pass a string for a path instead?'
102+ .format (filename_or_obj ))
103+ return engine
104+
105+
78106def _get_default_engine (path , allow_remote = False ):
79107 if allow_remote and is_remote_uri (path ):
80108 engine = _get_default_engine_remote_uri ()
@@ -170,8 +198,8 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True,
170198 Strings and Path objects are interpreted as a path to a netCDF file
171199 or an OpenDAP URL and opened with python-netCDF4, unless the filename
172200 ends with .gz, in which case the file is gunzipped and opened with
173- scipy.io.netcdf (only netCDF3 supported). File-like objects are opened
174- with scipy.io.netcdf (only netCDF3 supported ).
201+ scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
202+ objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF ).
175203 group : str, optional
176204 Path to the netCDF4 group in the given file to open (only works for
177205 netCDF4 files).
@@ -258,6 +286,13 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True,
258286 --------
259287 open_mfdataset
260288 """
289+ engines = [None , 'netcdf4' , 'scipy' , 'pydap' , 'h5netcdf' , 'pynio' ,
290+ 'cfgrib' , 'pseudonetcdf' ]
291+ if engine not in engines :
292+ raise ValueError ('unrecognized engine for open_dataset: {}\n '
293+ 'must be one of: {}'
294+ .format (engine , engines ))
295+
261296 if autoclose is not None :
262297 warnings .warn (
263298 'The autoclose argument is no longer used by '
@@ -316,18 +351,9 @@ def maybe_decode_store(store, lock=False):
316351
317352 if isinstance (filename_or_obj , backends .AbstractDataStore ):
318353 store = filename_or_obj
319- ds = maybe_decode_store (store )
320- elif isinstance (filename_or_obj , str ):
321354
322- if (isinstance (filename_or_obj , bytes ) and
323- filename_or_obj .startswith (b'\x89 HDF' )):
324- raise ValueError ('cannot read netCDF4/HDF5 file images' )
325- elif (isinstance (filename_or_obj , bytes ) and
326- filename_or_obj .startswith (b'CDF' )):
327- # netCDF3 file images are handled by scipy
328- pass
329- elif isinstance (filename_or_obj , str ):
330- filename_or_obj = _normalize_path (filename_or_obj )
355+ elif isinstance (filename_or_obj , str ):
356+ filename_or_obj = _normalize_path (filename_or_obj )
331357
332358 if engine is None :
333359 engine = _get_default_engine (filename_or_obj ,
@@ -352,18 +378,19 @@ def maybe_decode_store(store, lock=False):
352378 elif engine == 'cfgrib' :
353379 store = backends .CfGribDataStore (
354380 filename_or_obj , lock = lock , ** backend_kwargs )
355- else :
356- raise ValueError ('unrecognized engine for open_dataset: %r'
357- % engine )
358381
359- with close_on_error (store ):
360- ds = maybe_decode_store (store )
361382 else :
362- if engine is not None and engine != 'scipy' :
363- raise ValueError ('can only read file-like objects with '
364- "default engine or engine='scipy'" )
365- # assume filename_or_obj is a file-like object
366- store = backends .ScipyDataStore (filename_or_obj )
383+ if engine not in [None , 'scipy' , 'h5netcdf' ]:
384+ raise ValueError ("can only read bytes or file-like objects "
385+ "with engine='scipy' or 'h5netcdf'" )
386+ engine = _get_engine_from_magic_number (filename_or_obj )
387+ if engine == 'scipy' :
388+ store = backends .ScipyDataStore (filename_or_obj , ** backend_kwargs )
389+ elif engine == 'h5netcdf' :
390+ store = backends .H5NetCDFStore (filename_or_obj , group = group ,
391+ lock = lock , ** backend_kwargs )
392+
393+ with close_on_error (store ):
367394 ds = maybe_decode_store (store )
368395
369396 # Ensure source filename always stored in dataset object (GH issue #2550)
@@ -390,8 +417,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True,
390417 Strings and Paths are interpreted as a path to a netCDF file or an
391418 OpenDAP URL and opened with python-netCDF4, unless the filename ends
392419 with .gz, in which case the file is gunzipped and opened with
393- scipy.io.netcdf (only netCDF3 supported). File-like objects are opened
394- with scipy.io.netcdf (only netCDF3 supported ).
420+ scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
421+ objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF ).
395422 group : str, optional
396423 Path to the netCDF4 group in the given file to open (only works for
397424 netCDF4 files).
0 commit comments