Skip to content

Commit

Permalink
Merge branch 'branch-24.06' into perf-contains
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed May 15, 2024
2 parents b70edcf + b5f6aa5 commit 24f3a4a
Show file tree
Hide file tree
Showing 42 changed files with 5,311 additions and 44 deletions.
8 changes: 5 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,11 @@ repos:
hooks:
- id: verify-copyright
exclude: |
(?x)
cpp/include/cudf_test/cxxopts[.]hpp$
(?x)^(
cpp/include/cudf_test/cxxopts[.]hpp$|
cpp/src/io/parquet/ipc/Message_generated[.]h$|
cpp/src/io/parquet/ipc/Schema_generated[.]h$
)
default_language_version:
python: python3
5 changes: 5 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ include(cmake/thirdparty/get_cccl.cmake)
include(cmake/thirdparty/get_rmm.cmake)
# find arrow
include(cmake/thirdparty/get_arrow.cmake)
# find flatbuffers
include(cmake/thirdparty/get_flatbuffers.cmake)
# find dlpack
include(cmake/thirdparty/get_dlpack.cmake)
# find cuCollections, should come after including CCCL
Expand Down Expand Up @@ -262,6 +264,7 @@ add_library(
src/binaryop/compiled/Mod.cu
src/binaryop/compiled/Mul.cu
src/binaryop/compiled/NullEquals.cu
src/binaryop/compiled/NullNotEquals.cu
src/binaryop/compiled/NullLogicalAnd.cu
src/binaryop/compiled/NullLogicalOr.cu
src/binaryop/compiled/NullMax.cu
Expand Down Expand Up @@ -429,6 +432,7 @@ add_library(
src/io/text/bgzip_utils.cpp
src/io/text/multibyte_split.cu
src/io/utilities/arrow_io_source.cpp
src/io/utilities/base64_utilities.cpp
src/io/utilities/column_buffer.cpp
src/io/utilities/column_buffer_strings.cu
src/io/utilities/config_utils.cpp
Expand Down Expand Up @@ -742,6 +746,7 @@ target_include_directories(
"$<BUILD_INTERFACE:${CUDF_GENERATED_INCLUDE_DIR}/include>"
PRIVATE "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
"$<BUILD_INTERFACE:${nanoarrow_SOURCE_DIR}/src>"
"$<BUILD_INTERFACE:${FlatBuffers_SOURCE_DIR}/include>"
INTERFACE "$<INSTALL_INTERFACE:include>"
)

Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/binaryop/compiled_binaryop.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -111,5 +111,6 @@ BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NOT_EQUAL, bool
BINARYOP_BENCHMARK_DEFINE(timestamp_s, timestamp_s, LESS, bool);
BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s, GREATER, bool);
BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, NULL_EQUALS, bool);
BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, NULL_NOT_EQUALS, bool);
BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NULL_MAX, decimal32);
BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s, NULL_MIN, timestamp_s);
33 changes: 33 additions & 0 deletions cpp/cmake/thirdparty/get_flatbuffers.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# =============================================================================
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

# Use CPM to find or clone flatbuffers
function(find_and_configure_flatbuffers VERSION)

rapids_cpm_find(
flatbuffers ${VERSION}
GLOBAL_TARGETS flatbuffers
CPM_ARGS
GIT_REPOSITORY https://github.com/google/flatbuffers.git
GIT_TAG v${VERSION}
GIT_SHALLOW TRUE
)

rapids_export_find_package_root(
BUILD flatbuffers "${flatbuffers_BINARY_DIR}" EXPORT_SET cudf-exports
)

endfunction()

find_and_configure_flatbuffers(24.3.25)
2 changes: 2 additions & 0 deletions cpp/include/cudf/binaryop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ enum class binary_operator : int32_t {
GREATER_EQUAL, ///< operator >=
NULL_EQUALS, ///< Returns true when both operands are null; false when one is null; the
///< result of equality when both are non-null
NULL_NOT_EQUALS, ///< Returns false when both operands are null; true when one is null; the
///< result of inequality when both are non-null
NULL_MAX, ///< Returns max of operands when both are non-null; returns the non-null
///< operand when one is null; or invalid when both are null
NULL_MIN, ///< Returns min of operands when both are non-null; returns the non-null
Expand Down
28 changes: 28 additions & 0 deletions cpp/include/cudf/io/parquet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ class parquet_reader_options {
bool _convert_strings_to_categories = false;
// Whether to use PANDAS metadata to load columns
bool _use_pandas_metadata = true;
// Whether to read and use ARROW schema
bool _use_arrow_schema = true;
// Cast timestamp columns to a specific type
data_type _timestamp_type{type_id::EMPTY};

Expand Down Expand Up @@ -126,6 +128,13 @@ class parquet_reader_options {
*/
[[nodiscard]] bool is_enabled_use_pandas_metadata() const { return _use_pandas_metadata; }

/**
* @brief Returns true/false depending whether to use arrow schema while reading.
*
* @return `true` if arrow schema is used while reading
*/
[[nodiscard]] bool is_enabled_use_arrow_schema() const { return _use_arrow_schema; }

/**
* @brief Returns optional tree of metadata.
*
Expand Down Expand Up @@ -214,6 +223,13 @@ class parquet_reader_options {
*/
void enable_use_pandas_metadata(bool val) { _use_pandas_metadata = val; }

/**
* @brief Sets to enable/disable use of arrow schema to read.
*
* @param val Boolean value whether to use arrow schema
*/
void enable_use_arrow_schema(bool val) { _use_arrow_schema = val; }

/**
* @brief Sets reader column schema.
*
Expand Down Expand Up @@ -328,6 +344,18 @@ class parquet_reader_options_builder {
return *this;
}

/**
* @brief Sets to enable/disable use of arrow schema to read.
*
* @param val Boolean value whether to use arrow schema
* @return this for chaining
*/
parquet_reader_options_builder& use_arrow_schema(bool val)
{
options._use_arrow_schema = val;
return *this;
}

/**
* @brief Sets reader metadata.
*
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ std::pair<rmm::device_buffer, size_type> scalar_col_valid_mask_and(
*/
inline bool is_null_dependent(binary_operator op)
{
return op == binary_operator::NULL_EQUALS || op == binary_operator::NULL_MIN ||
op == binary_operator::NULL_MAX || op == binary_operator::NULL_LOGICAL_AND ||
op == binary_operator::NULL_LOGICAL_OR;
return op == binary_operator::NULL_EQUALS || op == binary_operator::NULL_NOT_EQUALS ||
op == binary_operator::NULL_MIN || op == binary_operator::NULL_MAX ||
op == binary_operator::NULL_LOGICAL_AND || op == binary_operator::NULL_LOGICAL_OR;
}

/**
Expand Down Expand Up @@ -109,7 +109,8 @@ bool is_comparison_binop(binary_operator op)
op == binary_operator::GREATER or // operator >
op == binary_operator::LESS_EQUAL or // operator <=
op == binary_operator::GREATER_EQUAL or // operator >=
op == binary_operator::NULL_EQUALS; // 2 null = true; 1 null = false; else ==
op == binary_operator::NULL_EQUALS or // 2 null = true; 1 null = false; else ==
op == binary_operator::NULL_NOT_EQUALS; // 2 null = false; 1 null = true; else !=
}

/**
Expand Down
26 changes: 26 additions & 0 deletions cpp/src/binaryop/compiled/NullNotEquals.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "binary_ops.cuh"

namespace cudf::binops::compiled {
template void apply_binary_op<ops::NullNotEquals>(mutable_column_view&,
column_view const&,
column_view const&,
bool is_lhs_scalar,
bool is_rhs_scalar,
rmm::cuda_stream_view);
} // namespace cudf::binops::compiled
4 changes: 3 additions & 1 deletion cpp/src/binaryop/compiled/binary_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ case binary_operator::LOG_BASE: apply_binary_op<ops::LogBase>(out, l
case binary_operator::ATAN2: apply_binary_op<ops::ATan2>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
case binary_operator::PMOD: apply_binary_op<ops::PMod>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
case binary_operator::NULL_EQUALS: apply_binary_op<ops::NullEquals>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
case binary_operator::NULL_NOT_EQUALS: apply_binary_op<ops::NullNotEquals>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
case binary_operator::NULL_MAX: apply_binary_op<ops::NullMax>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
case binary_operator::NULL_MIN: apply_binary_op<ops::NullMin>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
case binary_operator::NULL_LOGICAL_AND: apply_binary_op<ops::NullLogicalAnd>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
Expand Down Expand Up @@ -412,8 +413,9 @@ void apply_sorting_struct_binary_op(mutable_column_view& out,
// Struct child column type and structure mismatches are caught within the two_table_comparator
switch (op) {
case binary_operator::EQUAL: [[fallthrough]];
case binary_operator::NOT_EQUAL: [[fallthrough]];
case binary_operator::NULL_EQUALS: [[fallthrough]];
case binary_operator::NOT_EQUAL:
case binary_operator::NULL_NOT_EQUALS:
detail::apply_struct_equality_op(
out,
lhs,
Expand Down
1 change: 1 addition & 0 deletions cpp/src/binaryop/compiled/binary_ops.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ struct ops_wrapper {
type_dispatcher(rhs.type(), type_casted_accessor<TypeCommon>{}, i, rhs, is_rhs_scalar);
auto result = [&]() {
if constexpr (std::is_same_v<BinaryOperator, ops::NullEquals> or
std::is_same_v<BinaryOperator, ops::NullNotEquals> or
std::is_same_v<BinaryOperator, ops::NullLogicalAnd> or
std::is_same_v<BinaryOperator, ops::NullLogicalOr> or
std::is_same_v<BinaryOperator, ops::NullMax> or
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/binaryop/compiled/binary_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ void apply_binary_op(mutable_column_view& out,
* @brief Deploys single type or double type dispatcher that runs equality operation on each element
* of @p lhs and @p rhs columns.
*
* Comparison operators are EQUAL, NOT_EQUAL, NULL_EQUALS.
* Comparison operators are EQUAL, NOT_EQUAL, NULL_EQUALS, NULL_NOT_EQUALS.
* @p out type is boolean.
*
* This template is instantiated for each binary operator.
Expand Down
17 changes: 14 additions & 3 deletions cpp/src/binaryop/compiled/operation.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -422,15 +422,26 @@ struct NullEquals {
TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) -> decltype(x == y)
{
output_valid = true;
if (!lhs_valid && !rhs_valid) return true;
if (lhs_valid && rhs_valid) return x == y;
return false;
return !lhs_valid && !rhs_valid;
}
// To allow std::is_invocable_v = true
template <typename TypeLhs, typename TypeRhs>
__device__ inline auto operator()(TypeLhs x, TypeRhs y) -> decltype(x == y);
};

struct NullNotEquals {
template <typename TypeLhs, typename TypeRhs>
__device__ inline auto operator()(
TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) -> decltype(x != y)
{
return !NullEquals{}(x, y, lhs_valid, rhs_valid, output_valid);
}
// To allow std::is_invocable_v = true
template <typename TypeLhs, typename TypeRhs>
__device__ inline auto operator()(TypeLhs x, TypeRhs y) -> decltype(x != y);
};

struct NullMax {
template <typename TypeLhs,
typename TypeRhs,
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/binaryop/compiled/struct_binary_ops.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -148,7 +148,7 @@ void apply_struct_equality_op(mutable_column_view& out,
rmm::cuda_stream_view stream)
{
CUDF_EXPECTS(op == binary_operator::EQUAL || op == binary_operator::NOT_EQUAL ||
op == binary_operator::NULL_EQUALS,
op == binary_operator::NULL_EQUALS || op == binary_operator::NULL_NOT_EQUALS,
"Unsupported operator for these types",
cudf::data_type_error);

Expand Down
4 changes: 3 additions & 1 deletion cpp/src/binaryop/compiled/util.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -182,6 +182,8 @@ struct is_supported_operation_functor {
case binary_operator::LESS_EQUAL: return bool_op<ops::LessEqual, TypeLhs, TypeRhs>(out);
case binary_operator::GREATER_EQUAL: return bool_op<ops::GreaterEqual, TypeLhs, TypeRhs>(out);
case binary_operator::NULL_EQUALS: return bool_op<ops::NullEquals, TypeLhs, TypeRhs>(out);
case binary_operator::NULL_NOT_EQUALS:
return bool_op<ops::NullNotEquals, TypeLhs, TypeRhs>(out);
case binary_operator::NULL_LOGICAL_AND:
return bool_op<ops::NullLogicalAnd, TypeLhs, TypeRhs>(out);
case binary_operator::NULL_LOGICAL_OR:
Expand Down
Loading

0 comments on commit 24f3a4a

Please sign in to comment.