Closed
Description
It seems that PR #132, by changing the default engine from flox
to numpy
reactivated the bug of #101 : `func='count`` doesn't work on arrays of dtype 'O' with engine 'numpy'. Which means cftime arrays in most cases.
As we can't change the default engine that xarray uses (I think?), this means that this precise case is broken in xarray too.
Could we simply remove count
from the line here?
Line 299 in b21b040
Sorry, I should add an example!
import xarray as xr
from flox.xarray import xarray_reduce
t = xr.DataArray(xr.cftime_range('1900-01-01', periods=10, freq='D'), dims=('time',))
xarray_reduce(t, t.notnull().astype(int).rename('dummy'), func='count', engine='numpy')
Raises:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [20], in <cell line: 1>()
----> 1 xarray_reduce(t, t.notnull().astype(int).rename('dummy'), func='count', engine='numpy')
File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/xarray.py:335, in xarray_reduce(obj, func, expected_groups, isbin, sort, dim, split_out, fill_value, method, engine, keep_attrs, skipna, min_count, reindex, *by, **finalize_kwargs)
332 input_core_dims = _get_input_core_dims(group_names, dim, ds, grouper_dims)
333 input_core_dims += [input_core_dims[-1]] * (len(by) - 1)
--> 335 actual = xr.apply_ufunc(
336 wrapper,
337 ds.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims),
338 *by,
339 input_core_dims=input_core_dims,
340 # for xarray's test_groupby_duplicate_coordinate_labels
341 exclude_dims=set(dim),
342 output_core_dims=[group_names],
343 dask="allowed",
344 dask_gufunc_kwargs=dict(output_sizes=group_sizes),
345 keep_attrs=keep_attrs,
346 kwargs={
347 "func": func,
348 "axis": axis,
349 "sort": sort,
350 "split_out": split_out,
351 "fill_value": fill_value,
352 "method": method,
353 "min_count": min_count,
354 "skipna": skipna,
355 "engine": engine,
356 "reindex": reindex,
357 "expected_groups": tuple(expected_groups),
358 "isbin": isbin,
359 "finalize_kwargs": finalize_kwargs,
360 },
361 )
363 # restore non-dim coord variables without the core dimension
364 # TODO: shouldn't apply_ufunc handle this?
365 for var in set(ds.variables) - set(ds.dims):
File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/xarray/core/computation.py:1147, in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, *args)
1145 # feed datasets apply_variable_ufunc through apply_dataset_vfunc
1146 elif any(is_dict_like(a) for a in args):
-> 1147 return apply_dataset_vfunc(
1148 variables_vfunc,
1149 *args,
1150 signature=signature,
1151 join=join,
1152 exclude_dims=exclude_dims,
1153 dataset_join=dataset_join,
1154 fill_value=dataset_fill_value,
1155 keep_attrs=keep_attrs,
1156 )
1157 # feed DataArray apply_variable_ufunc through apply_dataarray_vfunc
1158 elif any(isinstance(a, DataArray) for a in args):
File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/xarray/core/computation.py:441, in apply_dataset_vfunc(func, signature, join, dataset_join, fill_value, exclude_dims, keep_attrs, *args)
436 list_of_coords = build_output_coords(
437 args, signature, exclude_dims, combine_attrs=keep_attrs
438 )
439 args = [getattr(arg, "data_vars", arg) for arg in args]
--> 441 result_vars = apply_dict_of_variables_vfunc(
442 func, *args, signature=signature, join=dataset_join, fill_value=fill_value
443 )
445 if signature.num_outputs > 1:
446 out = tuple(_fast_dataset(*args) for args in zip(result_vars, list_of_coords))
File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/xarray/core/computation.py:385, in apply_dict_of_variables_vfunc(func, signature, join, fill_value, *args)
383 result_vars = {}
384 for name, variable_args in zip(names, grouped_by_name):
--> 385 result_vars[name] = func(*variable_args)
387 if signature.num_outputs > 1:
388 return _unpack_dict_tuples(result_vars, signature.num_outputs)
File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/xarray/core/computation.py:727, in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, vectorize, keep_attrs, dask_gufunc_kwargs, *args)
722 if vectorize:
723 func = _vectorize(
724 func, signature, output_dtypes=output_dtypes, exclude_dims=exclude_dims
725 )
--> 727 result_data = func(*input_data)
729 if signature.num_outputs == 1:
730 result_data = (result_data,)
File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/xarray.py:311, in xarray_reduce.<locals>.wrapper(array, func, skipna, *by, **kwargs)
308 offset = min(array)
309 array = datetime_to_numeric(array, offset, datetime_unit="us")
--> 311 result, *groups = groupby_reduce(array, *by, func=func, **kwargs)
313 if requires_numeric:
314 if is_npdatetime:
File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/core.py:1573, in groupby_reduce(array, func, expected_groups, sort, isbin, axis, fill_value, min_count, split_out, method, engine, reindex, finalize_kwargs, *by)
1570 agg = _initialize_aggregation(func, array.dtype, fill_value, min_count, finalize_kwargs)
1572 if not has_dask:
-> 1573 results = _reduce_blockwise(
1574 array, by, agg, expected_groups=expected_groups, reindex=reindex, sort=sort, **kwargs
1575 )
1576 groups = (results["groups"],)
1577 result = results[agg.name]
File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/core.py:1020, in _reduce_blockwise(array, by, agg, axis, expected_groups, fill_value, engine, sort, reindex)
1017 finalize_kwargs = (finalize_kwargs,)
1018 finalize_kwargs = finalize_kwargs + ({},) + ({},)
-> 1020 results = chunk_reduce(
1021 array,
1022 by,
1023 func=agg.numpy,
1024 axis=axis,
1025 expected_groups=expected_groups,
1026 # This fill_value should only apply to groups that only contain NaN observations
1027 # BUT there is funkiness when axis is a subset of all possible values
1028 # (see below)
1029 fill_value=agg.fill_value["numpy"],
1030 dtype=agg.dtype["numpy"],
1031 kwargs=finalize_kwargs,
1032 engine=engine,
1033 sort=sort,
1034 reindex=reindex,
1035 ) # type: ignore
1037 if _is_arg_reduction(agg):
1038 results["intermediates"][0] = np.unravel_index(results["intermediates"][0], array.shape)[-1]
File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/core.py:689, in chunk_reduce(array, by, func, expected_groups, axis, fill_value, dtype, reindex, engine, kwargs, sort)
687 result = reduction(group_idx, array, **kwargs)
688 else:
--> 689 result = generic_aggregate(
690 group_idx, array, axis=-1, engine=engine, func=reduction, **kwargs
691 ).astype(dt, copy=False)
692 if np.any(props.nanmask):
693 # remove NaN group label which should be last
694 result = result[..., :-1]
File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/aggregations.py:49, in generic_aggregate(group_idx, array, engine, func, axis, size, fill_value, dtype, **kwargs)
44 else:
45 raise ValueError(
46 f"Expected engine to be one of ['flox', 'numpy', 'numba']. Received {engine} instead."
47 )
---> 49 return method(
50 group_idx, array, axis=axis, size=size, fill_value=fill_value, dtype=dtype, **kwargs
51 )
File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/flox/aggregate_npg.py:72, in _len(group_idx, array, engine, func, axis, size, fill_value, dtype)
71 def _len(group_idx, array, engine, *, func, axis=-1, size=None, fill_value=None, dtype=None):
---> 72 result = _get_aggregate(engine).aggregate(
73 group_idx,
74 array,
75 axis=axis,
76 func=func,
77 size=size,
78 fill_value=0,
79 dtype=np.int64,
80 )
81 if fill_value is not None:
82 result = result.astype(np.array([fill_value]).dtype)
File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/numpy_groupies/aggregate_numpy.py:307, in aggregate(group_idx, a, func, size, fill_value, order, dtype, axis, **kwargs)
305 def aggregate(group_idx, a, func='sum', size=None, fill_value=0, order='C',
306 dtype=None, axis=None, **kwargs):
--> 307 return _aggregate_base(group_idx, a, size=size, fill_value=fill_value,
308 order=order, dtype=dtype, func=func, axis=axis,
309 _impl_dict=_impl_dict, **kwargs)
File ~/mambaforge/envs/xclim/lib/python3.10/site-packages/numpy_groupies/aggregate_numpy.py:283, in _aggregate_base(group_idx, a, func, size, fill_value, order, dtype, axis, _impl_dict, **kwargs)
281 kwargs['_nansqueeze'] = True
282 else:
--> 283 good = ~np.isnan(a)
284 a = a[good]
285 group_idx = group_idx[good]
TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
Metadata
Metadata
Assignees
Labels
No labels