-
Notifications
You must be signed in to change notification settings - Fork 22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use magic bytes to identify file formats #143
Changes from all commits
7f55a19
d8445b0
49f089c
056a583
68f640c
a9961b6
ca1851e
f8bbc6b
928efd9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -308,6 +308,31 @@ def test_anon_read_s3(self, filetype, indexes): | |
assert isinstance(vds[var].data, ManifestArray), var | ||
|
||
|
||
@network | ||
class TestReadFromURL: | ||
def test_read_from_url(self): | ||
examples = { | ||
"grib": "https://github.com/pydata/xarray-data/raw/master/era5-2mt-2019-03-uk.grib", | ||
"netcdf3": "https://github.com/pydata/xarray-data/raw/master/air_temperature.nc", | ||
"netcdf4": "https://github.com/pydata/xarray-data/raw/master/ROMS_example.nc", | ||
"hdf4": "https://github.com/corteva/rioxarray/raw/master/test/test_data/input/MOD09GA.A2008296.h14v17.006.2015181011753.hdf", | ||
# https://github.com/zarr-developers/VirtualiZarr/issues/159 | ||
# "hdf5": "https://github.com/fsspec/kerchunk/raw/main/kerchunk/tests/NEONDSTowerTemperatureData.hdf5", | ||
# https://github.com/zarr-developers/VirtualiZarr/issues/160 | ||
# "tiff": "https://github.com/fsspec/kerchunk/raw/main/kerchunk/tests/lcmap_tiny_cog_2020.tif", | ||
# "fits": "https://fits.gsfc.nasa.gov/samples/WFPC2u5780205r_c0fx.fits", | ||
Comment on lines
+319
to
+323
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
"jpg": "https://github.com/rasterio/rasterio/raw/main/tests/data/389225main_sw_1965_1024.jpg", | ||
} | ||
|
||
for filetype, url in examples.items(): | ||
if filetype in ["grib", "jpg", "hdf4"]: | ||
with pytest.raises(NotImplementedError): | ||
vds = open_virtual_dataset(url, reader_options={}) | ||
else: | ||
vds = open_virtual_dataset(url, reader_options={}) | ||
assert isinstance(vds, xr.Dataset) | ||
|
||
|
||
class TestLoadVirtualDataset: | ||
def test_loadable_variables(self, netcdf4_file): | ||
vars_to_load = ["air", "time"] | ||
|
@@ -325,6 +350,13 @@ def test_loadable_variables(self, netcdf4_file): | |
if name in vars_to_load: | ||
xrt.assert_identical(vds.variables[name], full_ds.variables[name]) | ||
|
||
def test_explicit_filetype(self, netcdf4_file): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wasn't sure of best way to check these VDS are identical:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you open with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't seem to work:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm okay - I've raised #161 to track that. |
||
with pytest.raises(ValueError): | ||
open_virtual_dataset(netcdf4_file, filetype="unknown") | ||
|
||
with pytest.raises(NotImplementedError): | ||
open_virtual_dataset(netcdf4_file, filetype="grib") | ||
|
||
@patch("virtualizarr.kerchunk.read_kerchunk_references_from_file") | ||
def test_open_virtual_dataset_passes_expected_args( | ||
self, mock_read_kerchunk, netcdf4_file | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
keeping netcdf4 as an alias for hdf5 because i've seen code in reported issues using filetype=netcdf4