Skip to content

"count" reduction of cftime objects broken by new default #137

Closed
@aulemahal

Description

@aulemahal

It seems that PR #132, by changing the default engine from flox to numpy reactivated the bug of #101 : `func='count`` doesn't work on arrays of dtype 'O' with engine 'numpy'. Which means cftime arrays in most cases.

As we can't change the default engine that xarray uses (I think?), this means that this precise case is broken in xarray too.

Could we simply remove count from the line here?

requires_numeric = func not in ["count", "any", "all"]

Sorry, I should add an example!

import xarray as xr
from flox.xarray import xarray_reduce

t = xr.DataArray(xr.cftime_range('1900-01-01', periods=10, freq='D'), dims=('time',))
xarray_reduce(t, t.notnull().astype(int).rename('dummy'), func='count', engine='numpy')

Raises:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [20], in <cell line: 1>()
----> 1 xarray_reduce(t, t.notnull().astype(int).rename('dummy'), func='count', engine='numpy')

File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/xarray.py:335, in xarray_reduce(obj, func, expected_groups, isbin, sort, dim, split_out, fill_value, method, engine, keep_attrs, skipna, min_count, reindex, *by, **finalize_kwargs)
    332 input_core_dims = _get_input_core_dims(group_names, dim, ds, grouper_dims)
    333 input_core_dims += [input_core_dims[-1]] * (len(by) - 1)
--> 335 actual = xr.apply_ufunc(
    336     wrapper,
    337     ds.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims),
    338     *by,
    339     input_core_dims=input_core_dims,
    340     # for xarray's test_groupby_duplicate_coordinate_labels
    341     exclude_dims=set(dim),
    342     output_core_dims=[group_names],
    343     dask="allowed",
    344     dask_gufunc_kwargs=dict(output_sizes=group_sizes),
    345     keep_attrs=keep_attrs,
    346     kwargs={
    347         "func": func,
    348         "axis": axis,
    349         "sort": sort,
    350         "split_out": split_out,
    351         "fill_value": fill_value,
    352         "method": method,
    353         "min_count": min_count,
    354         "skipna": skipna,
    355         "engine": engine,
    356         "reindex": reindex,
    357         "expected_groups": tuple(expected_groups),
    358         "isbin": isbin,
    359         "finalize_kwargs": finalize_kwargs,
    360     },
    361 )
    363 # restore non-dim coord variables without the core dimension
    364 # TODO: shouldn't apply_ufunc handle this?
    365 for var in set(ds.variables) - set(ds.dims):

File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/xarray/core/computation.py:1147, in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, *args)
   1145 # feed datasets apply_variable_ufunc through apply_dataset_vfunc
   1146 elif any(is_dict_like(a) for a in args):
-> 1147     return apply_dataset_vfunc(
   1148         variables_vfunc,
   1149         *args,
   1150         signature=signature,
   1151         join=join,
   1152         exclude_dims=exclude_dims,
   1153         dataset_join=dataset_join,
   1154         fill_value=dataset_fill_value,
   1155         keep_attrs=keep_attrs,
   1156     )
   1157 # feed DataArray apply_variable_ufunc through apply_dataarray_vfunc
   1158 elif any(isinstance(a, DataArray) for a in args):

File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/xarray/core/computation.py:441, in apply_dataset_vfunc(func, signature, join, dataset_join, fill_value, exclude_dims, keep_attrs, *args)
    436 list_of_coords = build_output_coords(
    437     args, signature, exclude_dims, combine_attrs=keep_attrs
    438 )
    439 args = [getattr(arg, "data_vars", arg) for arg in args]
--> 441 result_vars = apply_dict_of_variables_vfunc(
    442     func, *args, signature=signature, join=dataset_join, fill_value=fill_value
    443 )
    445 if signature.num_outputs > 1:
    446     out = tuple(_fast_dataset(*args) for args in zip(result_vars, list_of_coords))

File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/xarray/core/computation.py:385, in apply_dict_of_variables_vfunc(func, signature, join, fill_value, *args)
    383 result_vars = {}
    384 for name, variable_args in zip(names, grouped_by_name):
--> 385     result_vars[name] = func(*variable_args)
    387 if signature.num_outputs > 1:
    388     return _unpack_dict_tuples(result_vars, signature.num_outputs)

File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/xarray/core/computation.py:727, in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, vectorize, keep_attrs, dask_gufunc_kwargs, *args)
    722     if vectorize:
    723         func = _vectorize(
    724             func, signature, output_dtypes=output_dtypes, exclude_dims=exclude_dims
    725         )
--> 727 result_data = func(*input_data)
    729 if signature.num_outputs == 1:
    730     result_data = (result_data,)

File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/xarray.py:311, in xarray_reduce.<locals>.wrapper(array, func, skipna, *by, **kwargs)
    308         offset = min(array)
    309         array = datetime_to_numeric(array, offset, datetime_unit="us")
--> 311 result, *groups = groupby_reduce(array, *by, func=func, **kwargs)
    313 if requires_numeric:
    314     if is_npdatetime:

File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/core.py:1573, in groupby_reduce(array, func, expected_groups, sort, isbin, axis, fill_value, min_count, split_out, method, engine, reindex, finalize_kwargs, *by)
   1570 agg = _initialize_aggregation(func, array.dtype, fill_value, min_count, finalize_kwargs)
   1572 if not has_dask:
-> 1573     results = _reduce_blockwise(
   1574         array, by, agg, expected_groups=expected_groups, reindex=reindex, sort=sort, **kwargs
   1575     )
   1576     groups = (results["groups"],)
   1577     result = results[agg.name]

File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/core.py:1020, in _reduce_blockwise(array, by, agg, axis, expected_groups, fill_value, engine, sort, reindex)
   1017     finalize_kwargs = (finalize_kwargs,)
   1018 finalize_kwargs = finalize_kwargs + ({},) + ({},)
-> 1020 results = chunk_reduce(
   1021     array,
   1022     by,
   1023     func=agg.numpy,
   1024     axis=axis,
   1025     expected_groups=expected_groups,
   1026     # This fill_value should only apply to groups that only contain NaN observations
   1027     # BUT there is funkiness when axis is a subset of all possible values
   1028     # (see below)
   1029     fill_value=agg.fill_value["numpy"],
   1030     dtype=agg.dtype["numpy"],
   1031     kwargs=finalize_kwargs,
   1032     engine=engine,
   1033     sort=sort,
   1034     reindex=reindex,
   1035 )  # type: ignore
   1037 if _is_arg_reduction(agg):
   1038     results["intermediates"][0] = np.unravel_index(results["intermediates"][0], array.shape)[-1]

File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/core.py:689, in chunk_reduce(array, by, func, expected_groups, axis, fill_value, dtype, reindex, engine, kwargs, sort)
    687     result = reduction(group_idx, array, **kwargs)
    688 else:
--> 689     result = generic_aggregate(
    690         group_idx, array, axis=-1, engine=engine, func=reduction, **kwargs
    691     ).astype(dt, copy=False)
    692 if np.any(props.nanmask):
    693     # remove NaN group label which should be last
    694     result = result[..., :-1]

File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/aggregations.py:49, in generic_aggregate(group_idx, array, engine, func, axis, size, fill_value, dtype, **kwargs)
     44 else:
     45     raise ValueError(
     46         f"Expected engine to be one of ['flox', 'numpy', 'numba']. Received {engine} instead."
     47     )
---> 49 return method(
     50     group_idx, array, axis=axis, size=size, fill_value=fill_value, dtype=dtype, **kwargs
     51 )

File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/aggregate_npg.py:72, in _len(group_idx, array, engine, func, axis, size, fill_value, dtype)
     71 def _len(group_idx, array, engine, *, func, axis=-1, size=None, fill_value=None, dtype=None):
---> 72     result = _get_aggregate(engine).aggregate(
     73         group_idx,
     74         array,
     75         axis=axis,
     76         func=func,
     77         size=size,
     78         fill_value=0,
     79         dtype=np.int64,
     80     )
     81     if fill_value is not None:
     82         result = result.astype(np.array([fill_value]).dtype)

File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/numpy_groupies/aggregate_numpy.py:307, in aggregate(group_idx, a, func, size, fill_value, order, dtype, axis, **kwargs)
    305 def aggregate(group_idx, a, func='sum', size=None, fill_value=0, order='C',
    306               dtype=None, axis=None, **kwargs):
--> 307     return _aggregate_base(group_idx, a, size=size, fill_value=fill_value,
    308                            order=order, dtype=dtype, func=func, axis=axis,
    309                            _impl_dict=_impl_dict, **kwargs)

File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/numpy_groupies/aggregate_numpy.py:283, in _aggregate_base(group_idx, a, func, size, fill_value, order, dtype, axis, _impl_dict, **kwargs)
    281     kwargs['_nansqueeze'] = True
    282 else:
--> 283     good = ~np.isnan(a)
    284     a = a[good]
    285     group_idx = group_idx[good]

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions