Skip to content

Commit 2831063

Browse files
Merge pull request #2 from edponce/ARROW-12959-R-Option-for-is-nullNaN-to-evaluate-to-t
ARROW-12959: [C++] Option for is_null(NaN) to evaluate to true
2 parents 2a8f739 + 4ad9337 commit 2831063

File tree

5 files changed

+44
-45
lines changed

5 files changed

+44
-45
lines changed

cpp/src/arrow/compute/kernels/scalar_validity.cc

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,6 @@ namespace compute {
3232
namespace internal {
3333
namespace {
3434

35-
template <typename T, typename R = T>
36-
using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, R>;
37-
3835
struct IsValidOperator {
3936
static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
4037
checked_cast<BooleanScalar*>(out)->value = in.is_valid;
@@ -78,9 +75,11 @@ struct IsInfOperator {
7875
}
7976
};
8077

78+
using NanNullState = OptionsWrapper<NanNullOptions>;
79+
8180
struct IsNullOperator {
8281
static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
83-
const auto options = OptionsWrapper<NanNullOptions>::Get(ctx);
82+
const auto options = NanNullState::Get(ctx);
8483
bool* out_value = &checked_cast<BooleanScalar*>(out)->value;
8584
if (in.is_valid) {
8685
if (is_floating(in.type->id())) {
@@ -98,7 +97,6 @@ struct IsNullOperator {
9897
}
9998
} else {
10099
*out_value = false;
101-
102100
}
103101
} else {
104102
*out_value = true;
@@ -108,8 +106,7 @@ struct IsNullOperator {
108106
}
109107

110108
template <typename T>
111-
static enable_if_floating_point<T, void> SetNanBits(const ArrayData& arr,
112-
ArrayData* out) {
109+
static void SetNanBits(const ArrayData& arr, ArrayData* out) {
113110
const T* data = arr.GetValues<T>(1);
114111
for (int64_t i = 0; i < arr.length; ++i) {
115112
if (std::isnan(data[i])) {
@@ -130,7 +127,7 @@ struct IsNullOperator {
130127
}
131128

132129
if (is_floating(arr.type->id())) {
133-
const auto options = OptionsWrapper<NanNullOptions>::Get(ctx);
130+
const auto options = NanNullState::Get(ctx);
134131
if (options.nan_is_null) {
135132
switch (arr.type->id()) {
136133
case Type::FLOAT:
@@ -260,8 +257,8 @@ const FunctionDoc is_inf_doc(
260257
{"values"});
261258

262259
const FunctionDoc is_null_doc(
263-
"Return true if null, NaN can be considered as null",
264-
("For each input value, emit true iff the value is null. Default behavior is to emit "
260+
"Return true if null, NaN values can be considered as null",
261+
("For each input value, emit true if the value is null. Default behavior is to emit "
265262
"false for NaN values. True can be emitted for NaN values by toggling "
266263
"NanNullOptions flag."),
267264
{"values"}, "NanNullOptions");
@@ -279,8 +276,7 @@ void RegisterScalarValidity(FunctionRegistry* registry) {
279276

280277
MakeFunction("is_null", &is_null_doc, {ValueDescr::ANY}, boolean(), IsNullExec,
281278
registry, MemAllocation::PREALLOCATE,
282-
/*can_write_into_slices=*/true, &kNanNullOptions,
283-
OptionsWrapper<NanNullOptions>::Init);
279+
/*can_write_into_slices=*/true, &kNanNullOptions, NanNullState::Init);
284280

285281
DCHECK_OK(registry->AddFunction(MakeIsFiniteFunction("is_finite", &is_finite_doc)));
286282
DCHECK_OK(registry->AddFunction(MakeIsInfFunction("is_inf", &is_inf_doc)));

docs/source/cpp/compute.rst

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -899,33 +899,33 @@ Structural transforms
899899

900900
.. XXX (this category is a bit of a hodgepodge)
901901
902-
+--------------------------+------------+---------------------------------------------------+---------------------+---------+
903-
| Function name | Arity | Input types | Output type | Notes |
904-
+==========================+============+===================================================+=====================+=========+
905-
| case_when | Varargs | Struct of Boolean (Arg 0), Any fixed-width (rest) | Input type | \(1) |
906-
+--------------------------+------------+---------------------------------------------------+---------------------+---------+
907-
| choose | Varargs | Integral (Arg 0); Fixed-width/Binary-like (rest) | Input type | \(2) |
908-
+--------------------------+------------+---------------------------------------------------+---------------------+---------+
909-
| coalesce | Varargs | Any | Input type | \(3) |
910-
+--------------------------+------------+---------------------------------------------------+---------------------+---------+
911-
| fill_null | Binary | Boolean, Null, Numeric, Temporal, String-like | Input type | \(4) |
912-
+--------------------------+------------+---------------------------------------------------+---------------------+---------+
913-
| if_else | Ternary | Boolean, Null, Numeric, Temporal | Input type | \(5) |
914-
+--------------------------+------------+---------------------------------------------------+---------------------+---------+
915-
| is_finite | Unary | Float, Double | Boolean | \(6) |
916-
+--------------------------+------------+---------------------------------------------------+---------------------+---------+
917-
| is_inf | Unary | Float, Double | Boolean | \(7) |
918-
+--------------------------+------------+---------------------------------------------------+---------------------+---------+
919-
| is_nan | Unary | Float, Double | Boolean | \(8) |
920-
+--------------------------+------------+---------------------------------------------------+---------------------+---------+
921-
| is_null | Unary | Any | Boolean | \(9) |
922-
+--------------------------+------------+---------------------------------------------------+---------------------+---------+
923-
| is_valid | Unary | Any | Boolean | \(10) |
924-
+--------------------------+------------+---------------------------------------------------+---------------------+---------+
925-
| list_value_length | Unary | List-like | Int32 or Int64 | \(11) |
926-
+--------------------------+------------+---------------------------------------------------+---------------------+---------+
927-
| make_struct | Varargs | Any | Struct | \(12) |
928-
+--------------------------+------------+---------------------------------------------------+---------------------+---------+
902+
+-------------------+---------+---------------------------------------------------+----------------+-----------------------------+-------+
903+
| Function name | Arity | Input types | Output type | Options class | Notes |
904+
+===================+=========+===================================================+================+=============================+=======+
905+
| case_when | Varargs | Struct of Boolean (Arg 0), Any fixed-width (rest) | Input type | | \(1) |
906+
+-------------------+---------+---------------------------------------------------+----------------+-----------------------------+-------+
907+
| choose | Varargs | Integral (Arg 0); Fixed-width/Binary-like (rest) | Input type | | \(2) |
908+
+-------------------+---------+---------------------------------------------------+----------------+-----------------------------+-------+
909+
| coalesce | Varargs | Any | Input type | | \(3) |
910+
+-------------------+---------+---------------------------------------------------+----------------+-----------------------------+-------+
911+
| fill_null | Binary | Boolean, Null, Numeric, Temporal, String-like | Input type | | \(4) |
912+
+-------------------+---------+---------------------------------------------------+----------------+-----------------------------+-------+
913+
| if_else | Ternary | Boolean, Null, Numeric, Temporal | Input type | | \(5) |
914+
+-------------------+---------+---------------------------------------------------+----------------+-----------------------------+-------+
915+
| is_finite | Unary | Float, Double | Boolean | | \(6) |
916+
+-------------------+---------+---------------------------------------------------+----------------+-----------------------------+-------+
917+
| is_inf | Unary | Float, Double | Boolean | | \(7) |
918+
+-------------------+---------+---------------------------------------------------+----------------+-----------------------------+-------+
919+
| is_nan | Unary | Float, Double | Boolean | | \(8) |
920+
+-------------------+---------+---------------------------------------------------+----------------+-----------------------------+-------+
921+
| is_null | Unary | Any | Boolean | :struct:`NanNullOptions` | \(9) |
922+
+-------------------+---------+---------------------------------------------------+----------------+-----------------------------+-------+
923+
| is_valid | Unary | Any | Boolean | | \(10) |
924+
+-------------------+---------+---------------------------------------------------+----------------+-----------------------------+-------+
925+
| list_value_length | Unary | List-like | Int32 or Int64 | | \(11) |
926+
+-------------------+---------+---------------------------------------------------+----------------+-----------------------------+-------+
927+
| make_struct | Varargs | Any | Struct | :struct:`MakeStructOptions` | \(12) |
928+
+-------------------+---------+---------------------------------------------------+----------------+-----------------------------+-------+
929929

930930
* \(1) This function acts like a SQL 'case when' statement or switch-case. The
931931
input is a "condition" value, which is a struct of Booleans, followed by the
@@ -966,7 +966,8 @@ Structural transforms
966966

967967
* \(8) Output is true iff the corresponding input element is NaN.
968968

969-
* \(9) Output is true iff the corresponding input element is null.
969+
* \(9) Output is true if the corresponding input element is null or if NaN
970+
values are treated as null via the :struct:`NanNullOptions`.
970971

971972
* \(10) Output is true iff the corresponding input element is non-null.
972973

python/pyarrow/_dataset.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ cdef class Expression(_Weakrefable):
238238
"""Checks whether the expression is null"""
239239
cdef:
240240
shared_ptr[CFunctionOptions] c_options
241-
241+
242242
c_options.reset(new CNanNullOptions(nan_is_null))
243243
return Expression._call("is_null", [self], c_options)
244244

python/pyarrow/array.pxi

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,11 +1039,12 @@ cdef class Array(_PandasConvertible):
10391039
else:
10401040
return 0
10411041

1042-
def is_null(self):
1042+
def is_null(self, nan_is_null=False):
10431043
"""
10441044
Return BooleanArray indicating the null values.
10451045
"""
1046-
return _pc().is_null(self)
1046+
options = _pc().NanNullOptions(nan_is_null)
1047+
return _pc().call_function('is_null', [self], options)
10471048

10481049
def is_valid(self):
10491050
"""

python/pyarrow/table.pxi

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,12 @@ cdef class ChunkedArray(_PandasConvertible):
170170
else:
171171
index -= self.chunked_array.chunk(j).get().length()
172172

173-
def is_null(self):
173+
def is_null(self, nan_is_null=False):
174174
"""
175175
Return BooleanArray indicating the null values.
176176
"""
177-
return _pc().is_null(self)
177+
options = _pc().NanNullOptions(nan_is_null)
178+
return _pc().call_function('is_null', [self], options)
178179

179180
def is_valid(self):
180181
"""

0 commit comments

Comments
 (0)