Open
Description
In dask/dask#9483 Dask now has an implementation of median
and median_approximate
. These should available with dask_cudf. cuDF currently raise a NotImplementedError with mean(axis=1)
:
cdf = cudf.datasets.timeseries()
cdf = dd.from_pandas(cdf, npartitions=2)
cdf.median(axis=1).compute()
File /datasets/bzaitlen/miniconda3/envs/tpcds-20220906/lib/python3.9/site-packages/cudf/core/dataframe.py:5225, in DataFrame.quantile(self, q, axis, numeric_only, interpolation, columns, exact)
5164 """
5165 Return values at the given quantile.
5166
(...)
5222 0.5 2.5 55.0
5223 """ # noqa: E501
5224 if axis not in (0, None):
-> 5225 raise NotImplementedError("axis is not implemented yet")
5227 data_df = self
5228 if numeric_only:
As for mean_approximate
, ValueError is thrown unexpectedly:
In [32]: cdf.compute()
Out[32]:
x y
0 1 1.1
1 2 2.2
2 3 3.3
3 4 4.4
4 5 5.5
In [33]: cdf.median_approximate().compute()
...
File /datasets/bzaitlen/miniconda3/envs/tpcds-20220906/lib/python3.9/site-packages/cudf/core/frame.py:898, in Frame.fillna(self, value, method, axis, inplace, limit)
892 should_fill = (
893 col_name in value
894 and col.contains_na_entries
895 and not libcudf.scalar._is_null_host_scalar(replace_val)
896 ) or method is not None
897 if should_fill:
--> 898 filled_data[col_name] = col.fillna(replace_val, method)
899 else:
900 filled_data[col_name] = col.copy(deep=True)
File /datasets/bzaitlen/miniconda3/envs/tpcds-20220906/lib/python3.9/site-packages/cudf/core/column/numerical.py:503, in NumericalColumn.fillna(self, fill_value, method, dtype, fill_nan)
499 return super(NumericalColumn, col).fillna(fill_value, method)
501 if np.isscalar(fill_value):
502 # cast safely to the same dtype as self
--> 503 fill_value_casted = col.dtype.type(fill_value)
504 if not np.isnan(fill_value) and (fill_value_casted != fill_value):
505 raise TypeError(
506 f"Cannot safely cast non-equivalent "
507 f"{type(fill_value).__name__} to {col.dtype.name}"
508 )
ValueError: cannot convert float NaN to integer