Skip to content

Commit

Permalink
Refactor the options to ignore errors. This is now good enough to close
Browse files Browse the repository at this point in the history
  • Loading branch information
alexamici committed Nov 3, 2018
1 parent 52945f4 commit d3d4896
Show file tree
Hide file tree
Showing 8 changed files with 24 additions and 27 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ Changelog for cfgrib
- Saves one index file per set of ``index_keys`` in a much more robust way.
- Refactor CF-encoding and add the new ``encode_cf`` option to ``backend_kwargs``.
See: `#23 <https://github.com/ecmwf/cfgrib/issues/23>`_.
- Refactor error handling and the option to ignore errors (not well documented yet).
See: `#13 <https://github.com/ecmwf/cfgrib/issues/13>`_.


0.9.3.1 (2018-10-28)
Expand Down
5 changes: 3 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -248,13 +248,14 @@ Attributes:

*cfgrib* also provides an **experimental function** that automate the selection of
appropriate ``filter_by_keys`` and returns a list of all valid ``xarray.Dataset``'s
in the GRIB file. The ``open_datasets`` is intended for interactive exploration of a file
in the GRIB file (add ``backend_kwargs={'errors': 'ignore'}`` for extra robustness).
The ``open_datasets`` is intended for interactive exploration of a file
and it is not part of the stable API. In the future it may change or be removed altogether.

.. code-block: python
>>> from cfgrib import xarray_store
>>> xarray_store.open_datasets('nam.t00z.awp21100.tm00.grib2')
>>> xarray_store.open_datasets('nam.t00z.awp21100.tm00.grib2', backend_kwargs={'errors': 'ignore'})
[<xarray.Dataset>
Dimensions: (isobaricInhPa: 19, x: 93, y: 65)
Coordinates:
Expand Down
7 changes: 1 addition & 6 deletions cfgrib/cfgrib_.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,7 @@ def __init__(self, filename, lock=None, **backend_kwargs):
if lock is None:
lock = ECCODES_LOCK
self.lock = ensure_lock(lock)

# NOTE: filter_by_keys is a dict, but CachingFileManager only accepts hashable types
if 'filter_by_keys' in backend_kwargs:
backend_kwargs['filter_by_keys'] = tuple(backend_kwargs['filter_by_keys'].items())

self.ds = cfgrib.open_file(filename, mode='r', **backend_kwargs)
self.ds = cfgrib.open_file(filename, **backend_kwargs)

def open_store_variable(self, name, var):
if isinstance(var.data, np.ndarray):
Expand Down
18 changes: 8 additions & 10 deletions cfgrib/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ def dict_merge(master, update):


def build_dataset_components(
stream, indexpath='{path}.{short_hash}.idx', filter_by_keys={},
stream, indexpath='{path}.{short_hash}.idx', filter_by_keys={}, errors='ignore',
encode_cf=('parameter', 'time', 'geography', 'vertical'), log=LOG,
):
filter_by_keys = dict(filter_by_keys)
Expand All @@ -389,7 +389,10 @@ def build_dataset_components(
dict_merge(dimensions, dims)
dict_merge(variables, vars)
except ValueError:
log.exception("skipping variable with paramId==%r shortName=%r", param_id, short_name)
if errors == 'ignore':
log.exception("skipping variable: paramId==%r shortName=%r", param_id, short_name)
else:
raise
attributes = enforce_unique_attributes(index, GLOBAL_ATTRIBUTES_KEYS, filter_by_keys)
cfgrib_ver = pkg_resources.get_distribution("cfgrib").version
eccodes_ver = eccodes.codes_get_api_version()
Expand All @@ -414,13 +417,8 @@ class Dataset(object):
attributes = attr.attrib(type=T.Dict[str, T.Any])
encoding = attr.attrib(type=T.Dict[str, T.Any])

@classmethod
def from_path(cls, path, mode='r', errors='ignore', **kwargs):
"""Open a GRIB file as a ``Dataset``."""
stream = messages.FileStream(path, message_class=cfmessage.CfMessage, errors=errors)
return cls(*build_dataset_components(stream, **kwargs))


def open_file(path, **kwargs):
def open_file(path, grib_errors='ignore', **kwargs):
"""Open a GRIB file as a ``cfgrib.Dataset``."""
return Dataset.from_path(path, **kwargs)
stream = messages.FileStream(path, message_class=cfmessage.CfMessage, errors=grib_errors)
return Dataset(*build_dataset_components(stream, **kwargs))
3 changes: 2 additions & 1 deletion cfgrib/xarray_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def open_dataset(path, backend_kwargs={}, filter_by_keys={}, **kwargs):
warnings.warn("passing filter_by_keys is depreciated use backend_kwargs", FutureWarning)
real_backend_kwargs = {
'filter_by_keys': filter_by_keys,
'errors': 'ignore',
'errors': 'strict',
'grib_errors': 'ignore',
}
real_backend_kwargs.update(backend_kwargs)
store = cfgrib_.CfGribDataStore(path, **real_backend_kwargs)
Expand Down
12 changes: 6 additions & 6 deletions tests/test_30_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_build_data_var_components_encode_cf_geography():


def test_Dataset():
res = dataset.Dataset.from_path(TEST_DATA)
res = dataset.open_file(TEST_DATA)
assert 'history' in res.attributes
assert res.attributes['GRIB_edition'] == 1
assert tuple(res.dimensions.keys()) == \
Expand All @@ -61,7 +61,7 @@ def test_Dataset():


def test_Dataset_no_encode():
res = dataset.Dataset.from_path(
res = dataset.open_file(
TEST_DATA, encode_cf=()
)
assert 'history' in res.attributes
Expand All @@ -71,7 +71,7 @@ def test_Dataset_no_encode():


def test_Dataset_encode_cf_time():
res = dataset.Dataset.from_path(TEST_DATA, encode_cf=('time',))
res = dataset.open_file(TEST_DATA, encode_cf=('time',))
assert 'history' in res.attributes
assert res.attributes['GRIB_edition'] == 1
assert tuple(res.dimensions.keys()) == ('number', 'time', 'level', 'i')
Expand All @@ -82,7 +82,7 @@ def test_Dataset_encode_cf_time():


def test_Dataset_encode_cf_geography():
res = dataset.Dataset.from_path(TEST_DATA, encode_cf=('geography',))
res = dataset.open_file(TEST_DATA, encode_cf=('geography',))
assert 'history' in res.attributes
assert res.attributes['GRIB_edition'] == 1
assert tuple(res.dimensions.keys()) == \
Expand All @@ -94,7 +94,7 @@ def test_Dataset_encode_cf_geography():


def test_Dataset_encode_cf_vertical():
res = dataset.Dataset.from_path(TEST_DATA, encode_cf=('vertical',))
res = dataset.open_file(TEST_DATA, encode_cf=('vertical',))
assert 'history' in res.attributes
assert res.attributes['GRIB_edition'] == 1
assert tuple(res.dimensions.keys()) == ('number', 'dataDate', 'dataTime', 'isobaricInhPa', 'i')
Expand All @@ -106,7 +106,7 @@ def test_Dataset_encode_cf_vertical():

def test_Dataset_reguler_gg_surface():
path = os.path.join(SAMPLE_DATA_FOLDER, 'regular_gg_sfc.grib')
res = dataset.Dataset.from_path(path)
res = dataset.open_file(path)

assert res.dimensions == {'latitude': 96, 'longitude': 192}
assert np.allclose(res.variables['latitude'].data[:2], [88.57216851, 86.72253095])
2 changes: 1 addition & 1 deletion tests/test_40_xarray_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_open_dataset_corrupted():
assert len(res.data_vars) == 1

with pytest.raises(eccodes.EcCodesError):
xarray_store.open_dataset(TEST_CORRUPTED, backend_kwargs={'errors': 'strict'})
xarray_store.open_dataset(TEST_CORRUPTED, backend_kwargs={'grib_errors': 'strict'})


def test_open_dataset_encode_cf_time():
Expand Down
2 changes: 1 addition & 1 deletion tests/test_50_sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_open_dataset(grib_name):
'hpa_and_pa',
't_on_different_level_types',
'tp_on_different_grid_resolutions',
pytest.param('uv_on_different_levels', marks=pytest.mark.xfail),
'uv_on_different_levels',
])
def test_open_dataset_fail(grib_name):
grib_path = os.path.join(SAMPLE_DATA_FOLDER, grib_name + '.grib')
Expand Down

0 comments on commit d3d4896

Please sign in to comment.