From 4296def72d31c9c1e8afa7c7250bd94b0f35948a Mon Sep 17 00:00:00 2001 From: Jeremy Maitin-Shepard Date: Tue, 11 Jul 2023 14:28:25 -0700 Subject: [PATCH] Add "identical" array comparison operation PiperOrigin-RevId: 547298221 Change-Id: I699fa3da5afebfedd6102aa3c990331578072afe --- tensorstore/array.cc | 11 ++++ tensorstore/array.h | 12 +++- tensorstore/array_test.cc | 44 +++++++++++++ tensorstore/data_type.h | 66 ++++++++++++++++--- tensorstore/internal/async_write_array.cc | 18 +++-- tensorstore/internal/async_write_array.h | 5 ++ .../internal/async_write_array_test.cc | 41 ++++++++++++ 7 files changed, 181 insertions(+), 16 deletions(-) diff --git a/tensorstore/array.cc b/tensorstore/array.cc index 4ffddbcd8..c61acb925 100644 --- a/tensorstore/array.cc +++ b/tensorstore/array.cc @@ -324,6 +324,17 @@ bool AreArraysSameValueEqual(const OffsetArrayView& a, .success; } +bool AreArraysIdenticallyEqual(const OffsetArrayView& a, + const OffsetArrayView& b) { + if (a.dtype() != b.dtype()) return false; + if (a.domain() != b.domain()) return false; + return internal::IterateOverArrays({&a.dtype()->compare_identical, nullptr}, + /*status=*/nullptr, + /*constraints=*/skip_repeated_elements, a, + b) + .success; +} + namespace internal_array { bool EncodeArray(serialization::EncodeSink& sink, diff --git a/tensorstore/array.h b/tensorstore/array.h index 3feda0c46..4dff5c45d 100644 --- a/tensorstore/array.h +++ b/tensorstore/array.h @@ -1898,8 +1898,9 @@ std::string ToString( /// ``operator==`` in that negative zero is not equal to positive zero, and /// NaN is equal to NaN. /// -/// Note that this differs from bit equality, because there are multiple bit -/// representations of NaN, and this functions treats all of them as equal. +/// Note that this differs from bit equality (`AreArraysIdentical`), because +/// there are multiple bit representations of NaN, and this functions treats all +/// of them as equal. /// /// Checks that the data types, domains, and content are equal. /// @@ -1907,6 +1908,13 @@ std::string ToString( bool AreArraysSameValueEqual(const OffsetArrayView& a, const OffsetArrayView& b); +/// Compares two arrays for "identical" equality. +/// +/// This differs from normal equality in that floating point values are compared +/// bitwise. +bool AreArraysIdenticallyEqual(const OffsetArrayView& a, + const OffsetArrayView& b); + /// Validates that `source_shape` can be broadcast to `target_shape`. /// /// A `source_shape` can be broadcast to a `target_shape` if, starting from the diff --git a/tensorstore/array_test.cc b/tensorstore/array_test.cc index 739201574..acde0ad92 100644 --- a/tensorstore/array_test.cc +++ b/tensorstore/array_test.cc @@ -73,6 +73,7 @@ using ::tensorstore::MakeArray; using ::tensorstore::MakeArrayView; using ::tensorstore::MakeCopy; using ::tensorstore::MakeOffsetArray; +using ::tensorstore::MakeScalarArray; using ::tensorstore::MakeScalarArrayView; using ::tensorstore::MatchesStatus; using ::tensorstore::offset_origin; @@ -995,6 +996,15 @@ TEST(ArrayTest, Compare) { EXPECT_TRUE(ArrayView(MakeScalarArrayView(1.0)) != MakeScalarArrayView(1)); EXPECT_TRUE(MakeArrayView({1}) != MakeArrayView({1, 2})); + + EXPECT_FALSE( + MakeScalarArray(std::numeric_limits::quiet_NaN()) == + MakeScalarArray(std::numeric_limits::quiet_NaN())); + + EXPECT_FALSE(MakeScalarArray>( + std::numeric_limits::quiet_NaN()) == + MakeScalarArray>( + std::numeric_limits::quiet_NaN())); } TEST(ArrayTest, SameValue) { @@ -1009,6 +1019,40 @@ TEST(ArrayTest, SameValue) { EXPECT_FALSE(AreArraysSameValueEqual( MakeArrayView({{NAN, 2, +0.0}, {4, 5, 6}}), MakeArrayView({{NAN, 2, -0.0}, {4, 5, 6}}))); + + EXPECT_TRUE(AreArraysSameValueEqual( + MakeScalarArray(std::numeric_limits::quiet_NaN()), + MakeScalarArray(std::numeric_limits::signaling_NaN()))); + + EXPECT_TRUE(AreArraysSameValueEqual( + MakeScalarArray>( + std::numeric_limits::quiet_NaN()), + MakeScalarArray>( + std::numeric_limits::signaling_NaN()))); +} + +TEST(ArrayTest, Identical) { + EXPECT_TRUE( + AreArraysIdenticallyEqual(MakeArrayView({{1, 2, 3}, {4, 5, 6}}), + MakeArrayView({{1, 2, 3}, {4, 5, 6}}))); + + EXPECT_TRUE(AreArraysIdenticallyEqual( + MakeArrayView({{NAN, 2, 3}, {4, 5, 6}}), + MakeArrayView({{NAN, 2, 3}, {4, 5, 6}}))); + + EXPECT_FALSE(AreArraysIdenticallyEqual( + MakeArrayView({{NAN, 2, +0.0}, {4, 5, 6}}), + MakeArrayView({{NAN, 2, -0.0}, {4, 5, 6}}))); + + EXPECT_FALSE(AreArraysIdenticallyEqual( + MakeScalarArray(std::numeric_limits::quiet_NaN()), + MakeScalarArray(std::numeric_limits::signaling_NaN()))); + + EXPECT_FALSE(AreArraysIdenticallyEqual( + MakeScalarArray>( + std::numeric_limits::quiet_NaN()), + MakeScalarArray>( + std::numeric_limits::signaling_NaN()))); } TEST(CopyArrayTest, ZeroOrigin) { diff --git a/tensorstore/data_type.h b/tensorstore/data_type.h index d2a67fa91..2b936296b 100644 --- a/tensorstore/data_type.h +++ b/tensorstore/data_type.h @@ -506,19 +506,31 @@ struct DataTypeOperations { using AppendToStringFunction = void (*)(std::string* result, const void* ptr); AppendToStringFunction append_to_string; - /// Compares two strided arrays for equality. + /// Compares two arrays for equality. + /// + /// This uses regular equality, which for floating point types considers + /// positive and negative zero equal, and NaN unequal to itself. using CompareEqualFunction = ElementwiseFunction<2, absl::Status*>; CompareEqualFunction compare_equal; - /// Compares two strided arrays for equality, taking into account negative + /// Compares two arrays for equality, taking into account negative /// zero and NaN for floating point types (negative zero is not equal to /// positive zero, and NaN is equal to NaN). /// - /// Note that this not the same as bit equality, because there are multiple - /// possible bit representations of NaN, and this function considers all of - /// them to be equal. + /// For integer types this is equivalent to `compare_equal`. + /// + /// Note that this not the same as `compare_identical`, because there are + /// multiple possible bit representations of NaN, and this function considers + /// all of them to be equal. CompareEqualFunction compare_same_value; + /// Checks if two arrays are identical. + /// + /// For integer and floating point types, this performs a bitwise comparison. + /// + /// For integer types this is equivalent to `compare_equal`. + CompareEqualFunction compare_identical; + struct CanonicalConversionOperations { // Function for converting to/from canonical data type. using ConvertFunction = ElementwiseFunction<2, absl::Status*>; @@ -621,6 +633,11 @@ class DataType { return operations_->compare_same_value; } + constexpr const Ops::CompareEqualFunction& compare_identical_function() + const { + return operations_->compare_identical; + } + constexpr const Ops::CopyAssignFunction& copy_assign_function() const { return operations_->copy_assign; } @@ -708,8 +725,9 @@ bool CompareEqual(const T& a, const T& b) { /// For floating point types, this differs from normal `operator==` in that /// negative zero is not equal to positive zero, and NaN is equal to NaN. /// -/// Note that this differs from bit equality, because there are multiple bit -/// representations of NaN, and this functions treats all of them as equal. +/// Note that this differs from bit equality (`CompareIdentical`), because there +/// are multiple bit representations of NaN, and this functions treats all of +/// them as equal. template bool CompareSameValue(const T& a, const T& b) { if constexpr (internal::IsEqualityComparable) { @@ -718,6 +736,17 @@ bool CompareSameValue(const T& a, const T& b) { return false; } +/// Checks if two values are identical (indistinguishable). +/// +/// For floating point types, this does a bitwise comparison. +template +bool CompareIdentical(const T& a, const T& b) { + if constexpr (internal::IsEqualityComparable) { + return a == b; + } + return false; +} + #define TENSORSTORE_INTERNAL_DO_DEFINE_COMPARE_SAME_VALUE_FLOAT(T, ...) \ template <> \ inline bool CompareSameValue(const T& a, const T& b) { \ @@ -726,6 +755,11 @@ bool CompareSameValue(const T& a, const T& b) { using Int = internal::uint_t; \ return internal::bit_cast(a) == internal::bit_cast(b); \ } \ + template <> \ + inline bool CompareIdentical(const T& a, const T& b) { \ + using Int = internal::uint_t; \ + return internal::bit_cast(a) == internal::bit_cast(b); \ + } \ /**/ TENSORSTORE_FOR_EACH_FLOAT_DATA_TYPE( TENSORSTORE_INTERNAL_DO_DEFINE_COMPARE_SAME_VALUE_FLOAT) @@ -737,6 +771,11 @@ TENSORSTORE_FOR_EACH_FLOAT_DATA_TYPE( return CompareSameValue(a.real(), b.real()) && \ CompareSameValue(a.imag(), b.imag()); \ } \ + template <> \ + inline bool CompareIdentical(const T& a, const T& b) { \ + return CompareIdentical(a.real(), b.real()) && \ + CompareIdentical(a.imag(), b.imag()); \ + } \ /**/ TENSORSTORE_FOR_EACH_COMPLEX_DATA_TYPE( TENSORSTORE_INTERNAL_DO_DEFINE_COMPARE_SAME_VALUE_COMPLEX) @@ -801,6 +840,12 @@ struct DataTypeElementwiseOperationsImpl { } }; + struct CompareIdenticalImpl { + bool operator()(const T* source, const T* dest, absl::Status*) const { + return internal_data_type::CompareIdentical(*source, *dest); + } + }; + using Initialize = internal::SimpleElementwiseFunction; @@ -817,6 +862,9 @@ struct DataTypeElementwiseOperationsImpl { absl::Status*>; using CompareSameValue = internal::SimpleElementwiseFunction< CompareSameValueImpl(const T, const T), absl::Status*>; + + using CompareIdentical = internal::SimpleElementwiseFunction< + CompareIdenticalImpl(const T, const T), absl::Status*>; }; template @@ -839,8 +887,10 @@ constexpr internal::DataTypeOperations DataTypeOperationsImpl = { /*.append_to_string=*/&DataTypeSimpleOperationsImpl::AppendToString, /*.compare_equal=*/ typename DataTypeElementwiseOperationsImpl::CompareEqual(), - /*.compare_equal=*/ + /*.compare_same_value=*/ typename DataTypeElementwiseOperationsImpl::CompareSameValue(), + /*.compare_identical=*/ + typename DataTypeElementwiseOperationsImpl::CompareIdentical(), /*.canonical_conversion=*/nullptr, }; diff --git a/tensorstore/internal/async_write_array.cc b/tensorstore/internal/async_write_array.cc index 39bf2a20f..17e5d6e9b 100644 --- a/tensorstore/internal/async_write_array.cc +++ b/tensorstore/internal/async_write_array.cc @@ -98,6 +98,16 @@ AsyncWriteArray::MaskedArray::GetArrayForWriteback( bool read_state_already_integrated) { assert(origin.size() == spec.rank()); WritebackData writeback; + + const auto must_store = [&](ArrayView array) { + if (spec.store_if_equal_to_fill_value) return true; + if (spec.compare_to_fill_value_using_identical_equality) { + return !AreArraysIdenticallyEqual(array, spec.fill_value); + } else { + return !AreArraysSameValueEqual(array, spec.fill_value); + } + }; + if (!data) { // No data has been allocated for the write array. This is only possible in // two cases: @@ -110,9 +120,7 @@ AsyncWriteArray::MaskedArray::GetArrayForWriteback( // Case 2: array is unmodified. assert(IsUnmodified()); if (read_array.data()) { - writeback.must_store = - spec.store_if_equal_to_fill_value || - !AreArraysSameValueEqual(read_array, spec.fill_value); + writeback.must_store = must_store(read_array); if (!writeback.must_store) { writeback.array = spec.fill_value; } else { @@ -144,9 +152,7 @@ AsyncWriteArray::MaskedArray::GetArrayForWriteback( } writeback.array = SharedArrayView( SharedElementPointer(data, spec.dtype()), spec.write_layout()); - writeback.must_store = - spec.store_if_equal_to_fill_value || - !AreArraysSameValueEqual(writeback.array, spec.fill_value); + writeback.must_store = must_store(writeback.array); if (!writeback.must_store) { data = nullptr; writeback.array = spec.fill_value; diff --git a/tensorstore/internal/async_write_array.h b/tensorstore/internal/async_write_array.h index 4c98c69b1..e917c0bb1 100644 --- a/tensorstore/internal/async_write_array.h +++ b/tensorstore/internal/async_write_array.h @@ -88,6 +88,11 @@ struct AsyncWriteArray { /// call to `WriteFillValue`, then it won't be stored. bool store_if_equal_to_fill_value = false; + /// If `true`, compare to fill value using `AreArraysIdenticallyEqual`. If + /// `false`, compare to fill value using `AreArraysSameValueEqual`. Only + /// has an effect if `store_if_equal_to_fill_value == false`. + bool compare_to_fill_value_using_identical_equality = false; + /// Returns the shape of the array. span shape() const { return fill_value.shape(); } diff --git a/tensorstore/internal/async_write_array_test.cc b/tensorstore/internal/async_write_array_test.cc index 5ef947b7a..7bc47f90d 100644 --- a/tensorstore/internal/async_write_array_test.cc +++ b/tensorstore/internal/async_write_array_test.cc @@ -327,6 +327,47 @@ TEST(MaskedArrayTest, StoreIfEqualToFillValue) { } } +// Tests that `compare_to_fill_value_using_identical_equality==true` is +// correctly handled. +TEST(MaskedArrayTest, CompareFillValueIdenticallyEqual) { + auto fill_value = + MakeScalarArray(std::numeric_limits::quiet_NaN()); + tensorstore::Box<> component_bounds; + Spec spec(fill_value, component_bounds); + spec.compare_to_fill_value_using_identical_equality = true; + MaskedArray write_state(0); + // Fully overwrite the portion within `component_bounds`. + TestWrite(&write_state, spec, {}, + tensorstore::MakeScalarArray( + std::numeric_limits::signaling_NaN()), + /*expected_modified=*/true); + { + auto writeback_data = write_state.GetArrayForWriteback( + spec, /*origin=*/{}, /*read_array=*/{}, + /*read_state_already_integrated=*/false); + EXPECT_TRUE(AreArraysIdenticallyEqual( + tensorstore::MakeScalarArray( + std::numeric_limits::signaling_NaN()), + writeback_data.array)); + EXPECT_TRUE(writeback_data.must_store); + } + + TestWrite(&write_state, spec, {}, + tensorstore::MakeScalarArray( + std::numeric_limits::quiet_NaN()), + /*expected_modified=*/true); + { + auto writeback_data = write_state.GetArrayForWriteback( + spec, /*origin=*/{}, /*read_array=*/{}, + /*read_state_already_integrated=*/false); + EXPECT_TRUE( + AreArraysIdenticallyEqual(tensorstore::MakeScalarArray( + std::numeric_limits::quiet_NaN()), + writeback_data.array)); + EXPECT_FALSE(writeback_data.must_store); + } +} + TEST(AsyncWriteArrayTest, Basic) { AsyncWriteArray async_write_array(2); auto fill_value = MakeArray({{1, 2, 3}, {4, 5, 6}});