Skip to content

Better error message for setting encoding["units"]  #5199

Closed
@yt87

Description

@yt87

What happened:

Setting invalid units for time axis encoding results in an exception AttributeError: 'NoneType' object has no attribute 'groups'

What you expected to happen:

It should say "invalid time units", like this (see commented out line below) ValueError: invalid time units: days after 1/3/2000

Minimal Complete Verifiable Example:

# Put your MCVE code here
import pandas as pd
import xarray as xr
ds = xr.Dataset(data_vars={'v': (('t',), [0,])},
                coords={'t': [pd.Timestamp(2000, 1, 1)]})
ds.t.encoding['units'] = 'days since Big Bang'
#ds.t.encoding['units'] = 'days after 1/3/2000'
ds.to_netcdf('/tmp/x.nc', mode='w')

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-16-0d97a797be64> in <module>
      5 ds.t.encoding['units'] = 'days since Big Bang'
      6 #ds.t.encoding['units'] = 'days after 1/3/2000'
----> 7 ds.to_netcdf('/tmp/x.nc', mode='w')

~/miniconda3/envs/xarray/lib/python3.9/site-packages/xarray-0.0.0-py3.9.egg/xarray/core/dataset.py in to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims, compute, invalid_netcdf)
   1752         from ..backends.api import to_netcdf
   1753 
-> 1754         return to_netcdf(
   1755             self,
   1756             path,

~/miniconda3/envs/xarray/lib/python3.9/site-packages/xarray-0.0.0-py3.9.egg/xarray/backends/api.py in to_netcdf(dataset, path_or_file, mode, format, group, engine, encoding, unlimited_dims, compute, multifile, invalid_netcdf)
   1066         # TODO: allow this work (setting up the file for writing array data)
   1067         # to be parallelized with dask
-> 1068         dump_to_store(
   1069             dataset, store, writer, encoding=encoding, unlimited_dims=unlimited_dims
   1070         )

~/miniconda3/envs/xarray/lib/python3.9/site-packages/xarray-0.0.0-py3.9.egg/xarray/backends/api.py in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
   1113         variables, attrs = encoder(variables, attrs)
   1114 
-> 1115     store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)
   1116 
   1117 

~/miniconda3/envs/xarray/lib/python3.9/site-packages/xarray-0.0.0-py3.9.egg/xarray/backends/common.py in store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
    261             writer = ArrayWriter()
    262 
--> 263         variables, attributes = self.encode(variables, attributes)
    264 
    265         self.set_attributes(attributes)

~/miniconda3/envs/xarray/lib/python3.9/site-packages/xarray-0.0.0-py3.9.egg/xarray/backends/common.py in encode(self, variables, attributes)
    350         # All NetCDF files get CF encoded by default, without this attempting
    351         # to write times, for example, would fail.
--> 352         variables, attributes = cf_encoder(variables, attributes)
    353         variables = {k: self.encode_variable(v) for k, v in variables.items()}
    354         attributes = {k: self.encode_attribute(v) for k, v in attributes.items()}

~/miniconda3/envs/xarray/lib/python3.9/site-packages/xarray-0.0.0-py3.9.egg/xarray/conventions.py in cf_encoder(variables, attributes)
    841     _update_bounds_encoding(variables)
    842 
--> 843     new_vars = {k: encode_cf_variable(v, name=k) for k, v in variables.items()}
    844 
    845     # Remove attrs from bounds variables (issue #2921)

~/miniconda3/envs/xarray/lib/python3.9/site-packages/xarray-0.0.0-py3.9.egg/xarray/conventions.py in <dictcomp>(.0)
    841     _update_bounds_encoding(variables)
    842 
--> 843     new_vars = {k: encode_cf_variable(v, name=k) for k, v in variables.items()}
    844 
    845     # Remove attrs from bounds variables (issue #2921)

~/miniconda3/envs/xarray/lib/python3.9/site-packages/xarray-0.0.0-py3.9.egg/xarray/conventions.py in encode_cf_variable(var, needs_copy, name)
    267         variables.UnsignedIntegerCoder(),
    268     ]:
--> 269         var = coder.encode(var, name=name)
    270 
    271     # TODO(shoyer): convert all of these to use coders, too:

~/miniconda3/envs/xarray/lib/python3.9/site-packages/xarray-0.0.0-py3.9.egg/xarray/coding/times.py in encode(self, variable, name)
    510             variable
    511         ):
--> 512             (data, units, calendar) = encode_cf_datetime(
    513                 data, encoding.pop("units", None), encoding.pop("calendar", None)
    514             )

~/miniconda3/envs/xarray/lib/python3.9/site-packages/xarray-0.0.0-py3.9.egg/xarray/coding/times.py in encode_cf_datetime(dates, units, calendar)
    448         units = infer_datetime_units(dates)
    449     else:
--> 450         units = _cleanup_netcdf_time_units(units)
    451 
    452     if calendar is None:

~/miniconda3/envs/xarray/lib/python3.9/site-packages/xarray-0.0.0-py3.9.egg/xarray/coding/times.py in _cleanup_netcdf_time_units(units)
    399 
    400 def _cleanup_netcdf_time_units(units):
--> 401     delta, ref_date = _unpack_netcdf_time_units(units)
    402     try:
    403         units = "{} since {}".format(delta, format_timestamp(ref_date))

~/miniconda3/envs/xarray/lib/python3.9/site-packages/xarray-0.0.0-py3.9.egg/xarray/coding/times.py in _unpack_netcdf_time_units(units)
    130 
    131     delta_units, ref_date = [s.strip() for s in matches.groups()]
--> 132     ref_date = _ensure_padded_year(ref_date)
    133 
    134     return delta_units, ref_date

~/miniconda3/envs/xarray/lib/python3.9/site-packages/xarray-0.0.0-py3.9.egg/xarray/coding/times.py in _ensure_padded_year(ref_date)
    105     # appropriately
    106     matches_start_digits = re.match(r"(\d+)(.*)", ref_date)
--> 107     ref_year, everything_else = [s for s in matches_start_digits.groups()]
    108     ref_date_padded = "{:04d}{}".format(int(ref_year), everything_else)
    109 

AttributeError: 'NoneType' object has no attribute 'groups'

Anything else we need to know?:
Are there detail specifications for the valid units string? Setting it to 1/3/2000 surprised me (my locale: LANG=en_CA.UTF-8).
Environment:
Latest code from github: xarray version 0.0.0

Output of xr.show_versions() INSTALLED VERSIONS ------------------ commit: None python: 3.9.2 | packaged by conda-forge | (default, Feb 21 2021, 05:02:46) [GCC 9.3.0] python-bits: 64 OS: Linux OS-release: 5.10.11-200.fc33.x86_64 machine: x86_64 processor: x86_64 byteorder: little LC_ALL: None LANG: en_CA.UTF-8 LOCALE: en_CA.UTF-8 libhdf5: 1.10.6 libnetcdf: 4.8.0

xarray: 0.0.0
pandas: 1.2.4
numpy: 1.20.2
scipy: None
netCDF4: 1.5.6
pydap: None
h5netcdf: None
h5py: None
Nio: None
zarr: 2.7.1
cftime: 1.4.1
nc_time_axis: None
PseudoNetCDF: None
rasterio: None
cfgrib: None
iris: None
bottleneck: None
dask: 2021.04.0
distributed: 2021.04.0
matplotlib: None
cartopy: None
seaborn: None
numbagg: None
pint: None
setuptools: 49.6.0.post20210108
pip: 21.0.1
conda: None
pytest: None
IPython: 7.22.0
sphinx: None

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions