From 78e3548cda5e7c263d125e8c10e733ebf2c4ebbd Mon Sep 17 00:00:00 2001
From: Sevag H <sevagh@protonmail.com>
Date: Tue, 23 Aug 2022 05:02:39 -0400
Subject: [PATCH 01/16] Include thrust/sort header (#34)

* Include thrust/sort header

* Drop unused CC and CXX

CI now uses conda compilers, not CC/CXX

* Rename docs_cuml target to docs_gputreeshap
---
 GPUTreeShap/gpu_treeshap.h | 1 +
 build.sh                   | 2 +-
 ci/gpu/build.sh            | 3 ---
 cmake/doxygen.cmake        | 2 +-
 4 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/GPUTreeShap/gpu_treeshap.h b/GPUTreeShap/gpu_treeshap.h
index 6dd0265..7f4d6f9 100644
--- a/GPUTreeShap/gpu_treeshap.h
+++ b/GPUTreeShap/gpu_treeshap.h
@@ -21,6 +21,7 @@
 #include <thrust/logical.h>
 #include <thrust/reduce.h>
 #include <thrust/host_vector.h>
+#include <thrust/sort.h>
 #include <cub/cub.cuh>
 #include <algorithm>
 #include <functional>
diff --git a/build.sh b/build.sh
index 2c2641b..e5e7bf5 100755
--- a/build.sh
+++ b/build.sh
@@ -3,4 +3,4 @@ mkdir -p build
 cd build
 cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_GTEST=ON -DBUILD_EXAMPLES=ON -DBUILD_BENCHMARKS=ON
 make -j4
-cmake --build . --target docs_cuml
+cmake --build . --target docs_gputreeshap
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 048bd9a..6f7c18e 100644
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -32,9 +32,6 @@ env
 gpuci_logger "Check GPU usage"
 nvidia-smi
 
-$CC --version
-$CXX --version
-
 gpuci_logger "Activate conda env"
 . /opt/conda/etc/profile.d/conda.sh
 conda activate rapids
diff --git a/cmake/doxygen.cmake b/cmake/doxygen.cmake
index 24a4aa2..92fea80 100644
--- a/cmake/doxygen.cmake
+++ b/cmake/doxygen.cmake
@@ -22,7 +22,7 @@ function(add_doxygen_target)
     set(multiValueArgs "")
     cmake_parse_arguments(dox "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
     configure_file(${dox_IN_DOXYFILE} ${dox_OUT_DOXYFILE} @ONLY)
-    add_custom_target(docs_cuml
+    add_custom_target(docs_gputreeshap
       ${DOXYGEN_EXECUTABLE} ${dox_OUT_DOXYFILE}
       WORKING_DIRECTORY ${dox_CWD}
       VERBATIM

From acb5be3c17e9adae34ac0b176da6ea8e197cb17e Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 31 Aug 2022 18:08:37 -0700
Subject: [PATCH 02/16] Fix Thrust includes. (#35)

---
 GPUTreeShap/gpu_treeshap.h          | 17 +++++++++++++++--
 benchmark/benchmark_gpu_treeshap.cu |  3 +++
 example/example.cu                  |  3 +++
 tests/test_gpu_treeshap.cu          |  5 +++++
 tests/test_utils.h                  |  3 +++
 5 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/GPUTreeShap/gpu_treeshap.h b/GPUTreeShap/gpu_treeshap.h
index 7f4d6f9..13eb6f8 100644
--- a/GPUTreeShap/gpu_treeshap.h
+++ b/GPUTreeShap/gpu_treeshap.h
@@ -15,14 +15,27 @@
  */
 
 #pragma once
+
+#include <thrust/copy.h>
 #include <thrust/device_allocator.h>
 #include <thrust/device_vector.h>
+#include <thrust/execution_policy.h>
+#include <thrust/for_each.h>
+#include <thrust/functional.h>
+#include <thrust/host_vector.h>
+#include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/discard_iterator.h>
+#include <thrust/iterator/transform_iterator.h>
 #include <thrust/logical.h>
+#include <thrust/pair.h>
 #include <thrust/reduce.h>
-#include <thrust/host_vector.h>
+#include <thrust/scan.h>
 #include <thrust/sort.h>
+#include <thrust/system/cuda/error.h>
+#include <thrust/system_error.h>
+
 #include <cub/cub.cuh>
+
 #include <algorithm>
 #include <functional>
 #include <set>
@@ -1172,7 +1185,7 @@ void ComputeBias(const PathVectorT& device_paths, DoubleVectorT* bias) {
                                                   PathIdxTransformOp());
   PathVectorT combined(sorted_paths.size());
   auto combined_out = thrust::reduce_by_key(
-      thrust::cuda ::par(alloc), path_key, path_key + sorted_paths.size(),
+      thrust::cuda::par(alloc), path_key, path_key + sorted_paths.size(),
       sorted_paths.begin(), thrust::make_discard_iterator(), combined.begin(),
       thrust::equal_to<size_t>(),
       [=] __device__(PathElement<SplitConditionT> a,
diff --git a/benchmark/benchmark_gpu_treeshap.cu b/benchmark/benchmark_gpu_treeshap.cu
index 50c35ad..d72ce6f 100644
--- a/benchmark/benchmark_gpu_treeshap.cu
+++ b/benchmark/benchmark_gpu_treeshap.cu
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 #include <GPUTreeShap/gpu_treeshap.h>
+
+#include <thrust/device_vector.h>
+
 #include <benchmark/benchmark.h>
 #include "../tests/test_utils.h"
 
diff --git a/example/example.cu b/example/example.cu
index 1ae5be9..ea0ba59 100644
--- a/example/example.cu
+++ b/example/example.cu
@@ -15,6 +15,9 @@
  */
 
 #include <GPUTreeShap/gpu_treeshap.h>
+
+#include <thrust/device_vector.h>
+
 #include <algorithm>
 #include <iostream>
 #include <limits>
diff --git a/tests/test_gpu_treeshap.cu b/tests/test_gpu_treeshap.cu
index 804a269..4586186 100644
--- a/tests/test_gpu_treeshap.cu
+++ b/tests/test_gpu_treeshap.cu
@@ -16,6 +16,11 @@
 
 #include <GPUTreeShap/gpu_treeshap.h>
 #include <cooperative_groups.h>
+
+#include <thrust/device_vector.h>
+#include <thrust/equal.h>
+#include <thrust/host_vector.h>
+
 #include <limits>
 #include <numeric>
 #include <random>
diff --git a/tests/test_utils.h b/tests/test_utils.h
index 7166f84..3aa9232 100644
--- a/tests/test_utils.h
+++ b/tests/test_utils.h
@@ -15,6 +15,9 @@
  */
 #pragma once
 #include <GPUTreeShap/gpu_treeshap.h>
+
+#include <thrust/device_vector.h>
+
 #include <limits>
 #include <numeric>
 #include <random>

From 787259b412c18ab8d5f24bf2b8bd6a59ff8208f3 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robertjmaynard@gmail.com>
Date: Sat, 5 Nov 2022 03:57:42 -0400
Subject: [PATCH 03/16] Correct lambda whose return type needs host evaluation.
 (#36)

---
 GPUTreeShap/gpu_treeshap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPUTreeShap/gpu_treeshap.h b/GPUTreeShap/gpu_treeshap.h
index 13eb6f8..4318fad 100644
--- a/GPUTreeShap/gpu_treeshap.h
+++ b/GPUTreeShap/gpu_treeshap.h
@@ -904,7 +904,7 @@ void DeduplicatePaths(PathVectorT* device_paths,
   size_t* h_num_runs_out;
   CheckCuda(cudaMallocHost(&h_num_runs_out, sizeof(size_t)));
 
-  auto combine = [] __device__(PathElement<SplitConditionT> a,
+  auto combine = [] __host__ __device__(PathElement<SplitConditionT> a,
                                PathElement<SplitConditionT> b) {
     // Combine duplicate features
     a.split_condition.Merge(b.split_condition);

From 42f05b94f780695a4826d6fd4de2585af862379e Mon Sep 17 00:00:00 2001
From: AJ Schmidt <aschmidt@nvidia.com>
Date: Thu, 27 Apr 2023 15:02:23 -0400
Subject: [PATCH 04/16] enable `copy_prs`

---
 .github/ops-bot.yaml | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 .github/ops-bot.yaml

diff --git a/.github/ops-bot.yaml b/.github/ops-bot.yaml
new file mode 100644
index 0000000..84bbe71
--- /dev/null
+++ b/.github/ops-bot.yaml
@@ -0,0 +1,4 @@
+# This file controls which features from the `ops-bot` repository below are enabled.
+# - https://github.com/rapidsai/ops-bot
+
+copy_prs: true

From 87ca83b1b923d2dfaee747be2e09a8501ca3253f Mon Sep 17 00:00:00 2001
From: AJ Schmidt <ajschmidt8@users.noreply.github.com>
Date: Fri, 28 Apr 2023 09:12:48 -0400
Subject: [PATCH 05/16] Add GitHub Action PR workflow (#37)

Add GitHub Action PR workflow
---
 .github/workflows/pr.yaml              | 31 ++++++++++++++
 build.sh                               |  2 +
 ci/build_and_test.sh                   | 31 ++++++++++++++
 ci/{checks/style.sh => check_style.sh} | 12 ++----
 ci/gpu/build.sh                        | 59 --------------------------
 dependencies.yaml                      | 24 +++++++++++
 6 files changed, 91 insertions(+), 68 deletions(-)
 create mode 100644 .github/workflows/pr.yaml
 create mode 100755 ci/build_and_test.sh
 rename ci/{checks/style.sh => check_style.sh} (63%)
 mode change 100644 => 100755
 delete mode 100644 ci/gpu/build.sh
 create mode 100644 dependencies.yaml

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
new file mode 100644
index 0000000..f50fd84
--- /dev/null
+++ b/.github/workflows/pr.yaml
@@ -0,0 +1,31 @@
+name: pr
+
+on:
+  push:
+    branches:
+      - "pull-request/[0-9]+"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  pr-builder:
+    needs:
+      - checks
+      - cpp-build-test
+    secrets: inherit
+    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.06
+  checks:
+    secrets: inherit
+    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.06
+    with:
+      enable_check_generated_files: false
+  cpp-build-test:
+    needs: checks
+    secrets: inherit
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06
+    with:
+      build_type: pull-request
+      test_script: ci/build_and_test.sh
+      matrix_filter: map(select(.ARCH == "amd64"))
diff --git a/build.sh b/build.sh
index e5e7bf5..b122c43 100755
--- a/build.sh
+++ b/build.sh
@@ -1,4 +1,6 @@
 #!/bin/bash
+set -euo pipefail
+
 mkdir -p build
 cd build
 cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_GTEST=ON -DBUILD_EXAMPLES=ON -DBUILD_BENCHMARKS=ON
diff --git a/ci/build_and_test.sh b/ci/build_and_test.sh
new file mode 100755
index 0000000..d9079b4
--- /dev/null
+++ b/ci/build_and_test.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+set -euo pipefail
+. /opt/conda/etc/profile.d/conda.sh
+
+rapids-logger "Generate C++ build & test dependencies"
+rapids-dependency-file-generator \
+  --output conda \
+  --file_key build \
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+
+rapids-mamba-retry env create --force -f env.yaml -n test
+
+# Temporarily allow unbound variables for conda activation.
+set +u
+conda activate test
+set -u
+
+rapids-print-env
+
+rapids-logger "Check GPU usage"
+nvidia-smi
+
+rapids-logger "Build C++ targets"
+./build.sh
+
+rapids-logger "GoogleTest"
+./build/TestGPUTreeShap
+
+rapids-logger "Run Example"
+./build/GPUTreeShapExample
diff --git a/ci/checks/style.sh b/ci/check_style.sh
old mode 100644
new mode 100755
similarity index 63%
rename from ci/checks/style.sh
rename to ci/check_style.sh
index ed0ca78..788f37e
--- a/ci/checks/style.sh
+++ b/ci/check_style.sh
@@ -1,21 +1,15 @@
 #!/bin/bash
-# Copyright (c) 2020, NVIDIA CORPORATION.
-#####################
-# GPUTreeShap Style Tester #
-#####################
+# Copyright (c) 2023, NVIDIA CORPORATION.
 
-# Ignore errors and set path
+# Ignore errors
 set +e
-PATH=/opt/conda/bin:$PATH
 RETVAL="0"
 
-# Activate common conda env
 . /opt/conda/etc/profile.d/conda.sh
-conda activate rapids
 
 # Check for a consistent code format
 pip install cpplint
-FORMAT=`cpplint --recursive GPUTreeShap tests example benchmark 2>&1`
+FORMAT=$(cpplint --recursive GPUTreeShap tests example benchmark 2>&1)
 FORMAT_RETVAL=$?
 if [ "$RETVAL" = "0" ]; then
   RETVAL=$FORMAT_RETVAL
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
deleted file mode 100644
index 6f7c18e..0000000
--- a/ci/gpu/build.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
-#########################################
-# GPUTreeShap GPU build and test script for CI #
-#########################################
-
-set -e
-NUMARGS=$#
-ARGS=$*
-
-# Set path and build parallel level
-export PATH=/usr/local/cuda/bin:$PATH
-export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
-export CUDA_REL=${CUDA_VERSION%.*}
-
-# Set home to the job's workspace
-export HOME=$WORKSPACE
-
-# Install gpuCI tools
-curl -s https://raw.githubusercontent.com/rapidsai/gpuci-tools/main/install.sh | bash
-source ~/.bashrc
-cd ~
-
-################################################################################
-# SETUP - Check environment
-################################################################################
-
-gpuci_logger "Check environment"
-env
-
-
-gpuci_logger "Check GPU usage"
-nvidia-smi
-
-gpuci_logger "Activate conda env"
-. /opt/conda/etc/profile.d/conda.sh
-conda activate rapids
-
-################################################################################
-# BUILD - Build tests
-################################################################################
-
-gpuci_logger "Build C++ targets"
-./build.sh
-
-################################################################################
-# TEST - Run GoogleTest
-################################################################################
-
-gpuci_logger "GoogleTest"
-cd $WORKSPACE/build
-./TestGPUTreeShap
-
-################################################################################
-# Run example
-################################################################################
-gpuci_logger "Example"
-cd $WORKSPACE/build
-./GPUTreeShapExample
diff --git a/dependencies.yaml b/dependencies.yaml
new file mode 100644
index 0000000..23b7b95
--- /dev/null
+++ b/dependencies.yaml
@@ -0,0 +1,24 @@
+# Dependency list for https://github.com/rapidsai/dependency-file-generator
+files:
+  build:
+    output: none
+    includes:
+      - build
+      - docs
+channels:
+  - rapidsai
+  - conda-forge
+dependencies:
+  build:
+    common:
+      - output_types: [conda]
+        packages:
+          - binutils
+          - cmake
+          - gxx <=11
+          - make
+  docs:
+    common:
+      - output_types: [conda]
+        packages:
+          - doxygen

From 2337231b25a0bc965eb0bb2e6af15081b8816994 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 21 Jul 2023 04:43:45 -0500
Subject: [PATCH 06/16] Use rapids-cmake for gtest and benchmark dependencies.
 (#38)

---
 CMakeLists.txt     | 65 ++++++++++++++++++++++++++++++----------------
 fetch_rapids.cmake | 19 ++++++++++++++
 2 files changed, 62 insertions(+), 22 deletions(-)
 create mode 100644 fetch_rapids.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d70b8e0..b8f245e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,8 +1,33 @@
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-include(FetchContent)
-project(GPUTreeShap LANGUAGES CXX CUDA)
-set(CMAKE_CXX_STANDARD 14)
-set(CMAKE_CUDA_STANDARD 14)
+#=============================================================================
+# Copyright (c) 2020-2023 NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#=============================================================================
+
+cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
+
+include(fetch_rapids.cmake)
+
+include(rapids-cmake)
+include(rapids-cpm)
+include(rapids-cuda)
+include(rapids-export)
+include(rapids-find)
+
+rapids_cuda_init_architectures(CUML)
+
+project(GPUTreeShap VERSION 23.08.00 LANGUAGES CXX CUDA)
+
 option(BUILD_GTEST "Build google tests" OFF)
 option(BUILD_EXAMPLES "Build examples" OFF)
 option(BUILD_BENCHMARKS "Build benchmarks" OFF)
@@ -12,23 +37,24 @@ if (NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
 endif()
 
+# add third party dependencies using CPM
+rapids_cpm_init()
+
 # GPUTreeSHAP target is header-only
 add_library(GPUTreeShap INTERFACE)
 add_library(GPUTreeShap::GPUTreeShap ALIAS GPUTreeShap)
 target_sources(GPUTreeShap INTERFACE ${GPUTreeShap_SOURCE_DIR}/GPUTreeShap/gpu_treeshap.h)
 target_include_directories(GPUTreeShap INTERFACE ${GPUTreeShap_SOURCE_DIR})
 
+set_property(TARGET GPUTreeShap PROPERTY CXX_STANDARD 17)
+set_property(TARGET GPUTreeShap PROPERTY CUDA_STANDARD 17)
 set(COMPILE_OPTIONS --expt-extended-lambda -lineinfo --Werror all-warnings)
 set(GCC_COMPILE_OPTIONS -Xcompiler -Werror,-Wall,-Wextra)
+
 if(BUILD_GTEST)
-  set(gtest_force_shared_crt ON CACHE BOOL "Always use msvcrt.dll")
-  FetchContent_Declare(
-    googletest
-    GIT_REPOSITORY https://github.com/google/googletest.git
-    GIT_TAG        release-1.8.1
-    GIT_SHALLOW       true
-  )
-  FetchContent_MakeAvailable(googletest)
+  include(${rapids-cmake-dir}/cpm/gtest.cmake)
+  rapids_cpm_gtest()
+
   add_executable(TestGPUTreeShap tests/test_gpu_treeshap.cu)
   target_link_libraries(TestGPUTreeShap PRIVATE GPUTreeShap)
   target_compile_options(TestGPUTreeShap PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: ${COMPILE_OPTIONS}>)
@@ -49,14 +75,9 @@ if(BUILD_EXAMPLES)
 endif()
 
 if(BUILD_BENCHMARKS)
-  FetchContent_Declare(
-    benchmark
-    GIT_REPOSITORY https://github.com/google/benchmark.git
-    GIT_TAG        v1.5.2
-    GIT_SHALLOW       true
-  )
-  set(BENCHMARK_ENABLE_TESTING OFF CACHE INTERNAL "Google benchmark tests off")
-  FetchContent_MakeAvailable(benchmark)
+  include(${rapids-cmake-dir}/cpm/gbench.cmake)
+  rapids_cpm_gbench()
+
   add_executable(BenchmarkGPUTreeShap benchmark/benchmark_gpu_treeshap.cu)
   target_link_libraries(BenchmarkGPUTreeShap PRIVATE GPUTreeShap)
   target_compile_options(BenchmarkGPUTreeShap PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: ${COMPILE_OPTIONS}>)
@@ -69,4 +90,4 @@ endif()
 include(cmake/doxygen.cmake)
 add_doxygen_target(IN_DOXYFILE Doxyfile.in
   OUT_DOXYFILE ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
-  CWD ${CMAKE_CURRENT_BINARY_DIR})
\ No newline at end of file
+  CWD ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake
new file mode 100644
index 0000000..dab6e5a
--- /dev/null
+++ b/fetch_rapids.cmake
@@ -0,0 +1,19 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/GPUTREESHAP_RAPIDS.cmake)
+  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.08/RAPIDS.cmake
+       ${CMAKE_CURRENT_BINARY_DIR}/GPUTREESHAP_RAPIDS.cmake
+  )
+endif()
+include(${CMAKE_CURRENT_BINARY_DIR}/GPUTREESHAP_RAPIDS.cmake)

From ae946908b4cdc2bf498deefc426a3656761166f5 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 21 Jul 2023 05:31:07 -0500
Subject: [PATCH 07/16] Update workflows, codeowners, update-version.sh (#39)

---
 .github/CODEOWNERS           |  7 ++++++
 .github/workflows/pr.yaml    |  6 ++---
 CMakeLists.txt               |  9 ++++++--
 Doxyfile.in                  |  2 +-
 ci/release/update-version.sh | 45 ++++++++++++++++++++++++++++++++++++
 5 files changed, 63 insertions(+), 6 deletions(-)
 create mode 100644 .github/CODEOWNERS
 create mode 100755 ci/release/update-version.sh

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 0000000..409d2cd
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,7 @@
+#gputreeshap code owners
+*                  @rapidsai/gputreeshap-write
+
+#build/ops code owners
+.github/           @rapidsai/ops-codeowners
+ci/                @rapidsai/ops-codeowners
+dependencies.yaml  @rapidsai/ops-codeowners
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index f50fd84..3e87ca0 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -15,16 +15,16 @@ jobs:
       - checks
       - cpp-build-test
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.08
   checks:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.08
     with:
       enable_check_generated_files: false
   cpp-build-test:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.08
     with:
       build_type: pull-request
       test_script: ci/build_and_test.sh
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b8f245e..8f9d3c6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,13 @@
 
 cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 
+
+include(FetchContent)
+
+project(GPUTreeShap VERSION 23.08.00 LANGUAGES CXX CUDA)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CUDA_STANDARD 17)
+
 include(fetch_rapids.cmake)
 
 include(rapids-cmake)
@@ -26,8 +33,6 @@ include(rapids-find)
 
 rapids_cuda_init_architectures(CUML)
 
-project(GPUTreeShap VERSION 23.08.00 LANGUAGES CXX CUDA)
-
 option(BUILD_GTEST "Build google tests" OFF)
 option(BUILD_EXAMPLES "Build examples" OFF)
 option(BUILD_BENCHMARKS "Build benchmarks" OFF)
diff --git a/Doxyfile.in b/Doxyfile.in
index f0b53ae..09c72a1 100644
--- a/Doxyfile.in
+++ b/Doxyfile.in
@@ -38,7 +38,7 @@ PROJECT_NAME           = "GPUTreeShap"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         =
+PROJECT_NUMBER         = "23.08"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
new file mode 100755
index 0000000..69390f7
--- /dev/null
+++ b/ci/release/update-version.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# Copyright (c) 2023 NVIDIA CORPORATION.
+###############################
+# gputreeshap Version Updater #
+###############################
+
+## Usage
+# bash update-version.sh <new_version>
+
+
+# Format is YY.MM.PP - no leading 'v' or trailing 'a'
+NEXT_FULL_TAG=$1
+
+# Get current version
+CURRENT_TAG=$(git tag --merged HEAD | grep -xE '^v.*' | sort --version-sort | tail -n 1 | tr -d 'v')
+CURRENT_MAJOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[1]}')
+CURRENT_MINOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}')
+CURRENT_PATCH=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}')
+CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR}
+
+#Get <major>.<minor> for next version
+NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}')
+NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
+NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
+
+echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"
+
+# Inplace sed replace; workaround for Linux and Mac
+function sed_runner() {
+    sed -i.bak ''"$1"'' $2 && rm -f ${2}.bak
+}
+
+# CMakeLists
+sed_runner 's/'"GPUTREESHAP VERSION .* LANGUAGES"'/'"GPUTREESHAP VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' CMakeLists.txt
+
+# rapids-cmake version
+sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake
+
+# Update project_number (RAPIDS_VERSION) in the CPP doxygen file
+sed_runner "s/\(PROJECT_NUMBER.*=\).*/\1 \"${NEXT_SHORT_TAG}\"/g" Doxyfile.in
+
+# CI files
+for FILE in .github/workflows/*.yaml; do
+  sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
+done

From 854070d9fcc2e99ab725175abbcb58c4560b8dce Mon Sep 17 00:00:00 2001
From: AJ Schmidt <ajschmidt8@users.noreply.github.com>
Date: Mon, 28 Aug 2023 09:35:08 -0400
Subject: [PATCH 08/16] Use `copy-pr-bot` (#40)

This PR replaces the `copy_prs` functionality from the `ops-bot` with the new dedicated `copy-pr-bot` GitHub application.

Thorough documentation for the new `copy-pr-bot` application can be viewed below.

- https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/

**Important**: `copy-pr-bot` enforces signed commits. If an organization member opens a PR that contains unsigned commits, it will be deemed untrusted and therefore require an `/ok to test` comment. See the GitHub docs [here](https://docs.github.com/en/authentication/managing-commit-signature-verification/about-commit-signature-verification) for information on how to set up commit signing.

Any time a PR is deemed untrusted, it will receive a comment that looks like this: https://github.com/rapidsai/ci-imgs/pull/63#issuecomment-1688973208.

Every subsequent commit on an untrusted PR will require an additional `/ok to test` comment.

Any existing PRs that have unsigned commits after this change is merged will require an `/ok to test` comment for each subsequent commit _or_ the PR can be rebased to include signed commits as mentioned in the docs below:
https://docs.gha-runners.nvidia.com/cpr/contributors.

This information is all included on the documentation page linked above.

_I've skipped CI on this PR since it's not a change that is tested._

[skip ci]
---
 .github/copy-pr-bot.yaml | 4 ++++
 .github/ops-bot.yaml     | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)
 create mode 100644 .github/copy-pr-bot.yaml
 delete mode 100644 .github/ops-bot.yaml

diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
new file mode 100644
index 0000000..895ba83
--- /dev/null
+++ b/.github/copy-pr-bot.yaml
@@ -0,0 +1,4 @@
+# Configuration file for `copy-pr-bot` GitHub App
+# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/
+
+enabled: true
diff --git a/.github/ops-bot.yaml b/.github/ops-bot.yaml
deleted file mode 100644
index 84bbe71..0000000
--- a/.github/ops-bot.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# This file controls which features from the `ops-bot` repository below are enabled.
-# - https://github.com/rapidsai/ops-bot
-
-copy_prs: true

From 0b4429f8e2771d941c9f9f64158c80bc1528f8b7 Mon Sep 17 00:00:00 2001
From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com>
Date: Wed, 18 Oct 2023 08:36:19 -0500
Subject: [PATCH 09/16] update workflow links (#41)

---
 .github/workflows/pr.yaml    | 6 +++---
 ci/release/update-version.sh | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 3e87ca0..ff45010 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -15,16 +15,16 @@ jobs:
       - checks
       - cpp-build-test
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.08
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-23.08
   checks:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.08
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-23.08
     with:
       enable_check_generated_files: false
   cpp-build-test:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.08
     with:
       build_type: pull-request
       test_script: ci/build_and_test.sh
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 69390f7..0172066 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -41,5 +41,5 @@ sed_runner "s/\(PROJECT_NUMBER.*=\).*/\1 \"${NEXT_SHORT_TAG}\"/g" Doxyfile.in
 
 # CI files
 for FILE in .github/workflows/*.yaml; do
-  sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
+  sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
 done

From ba0d5c80f8919416ee5e15eb39b1324c7066714c Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 12 Dec 2023 13:34:00 -0500
Subject: [PATCH 10/16] Correct #20054-D errors found when compiling on ARM

The nvcc compiler on ARM is more stringent and detects
incorrect shared memory usage in kernels. The static
shared memory approach only works with default constructor
types.

So we update `PathElement` to properly follow default construction
rules.
---
 GPUTreeShap/gpu_treeshap.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/GPUTreeShap/gpu_treeshap.h b/GPUTreeShap/gpu_treeshap.h
index 4318fad..b40f2c9 100644
--- a/GPUTreeShap/gpu_treeshap.h
+++ b/GPUTreeShap/gpu_treeshap.h
@@ -114,16 +114,16 @@ struct PathElement {
   }
 
   /*! Unique path index. */
-  size_t path_idx;
+  size_t path_idx{};
   /*! Feature of this split, -1 indicates bias term. */
-  int64_t feature_idx;
+  int64_t feature_idx{};
   /*! Indicates class for multiclass problems. */
-  int group;
-  SplitConditionT split_condition;
+  int group{};
+  SplitConditionT split_condition{};
   /*! Probability of following this path when feature_idx is not in the active
    * set. */
-  double zero_fraction;
-  float v;  // Leaf weight at the end of the path
+  double zero_fraction{};
+  float v{};  // Leaf weight at the end of the path
 };
 
 // Helper function that accepts an index into a flat contiguous array and the

From fda0aabb11959e846a559ab6ee3e0f2d2e3d35f0 Mon Sep 17 00:00:00 2001
From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com>
Date: Mon, 18 Mar 2024 13:58:27 -0500
Subject: [PATCH 11/16] Update script input name (#44)

* updating script input

* update workflow branch target
---
 .github/workflows/pr.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index ff45010..4f044a4 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -15,17 +15,17 @@ jobs:
       - checks
       - cpp-build-test
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-23.08
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04
   checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-23.08
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04
     with:
       enable_check_generated_files: false
   cpp-build-test:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04
     with:
       build_type: pull-request
-      test_script: ci/build_and_test.sh
+      script: ci/build_and_test.sh
       matrix_filter: map(select(.ARCH == "amd64"))

From fc777572f5c1850ecfbfe05f4a268f8600f9f231 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 2 Apr 2024 18:36:37 -0500
Subject: [PATCH 12/16] Add dependency on CUDA. (#46)

* Add dependency on CUDA.

* Pin same compilers as rest of RAPIDS.

* Remove extra else().

* Use other compiler packages.

* Remove sysroot.

* Several changes to make CI pass.

* Update to 24.04. Closes #47.

* Fix lint.

* Update style checks to use pre-commit.

* Add checks to dependencies.yaml.

* Cleanup.

* Add checks file key.
---
 .gitignore                   |  2 +-
 .pre-commit-config.yaml      | 21 ++++++++++++++++
 CMakeLists.txt               |  7 +++---
 Doxyfile.in                  |  2 +-
 GPUTreeShap/gpu_treeshap.h   | 18 +++++++-------
 README.md                    |  2 +-
 ci/check_style.sh            | 33 ++++++++++---------------
 ci/release/update-version.sh |  4 +--
 dependencies.yaml            | 47 +++++++++++++++++++++++++++++++++---
 fetch_rapids.cmake           |  4 +--
 tests/test_gpu_treeshap.cu   |  5 ++--
 11 files changed, 100 insertions(+), 45 deletions(-)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.gitignore b/.gitignore
index 2dbf1a6..47702aa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,4 @@ build/
 *.pdf
 *.csv
 *.png
-*.pyc
\ No newline at end of file
+*.pyc
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..e9ccfab
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,21 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+  - repo: https://github.com/cpplint/cpplint
+    rev: 1.6.1
+    hooks:
+      - id: cpplint
+        args: ["--recursive"]
+  - repo: https://github.com/rapidsai/pre-commit-hooks
+    rev: v0.0.3
+    hooks:
+      - id: verify-copyright
+        args: ["--fix", "--main-branch", "main"]
+
+default_language_version:
+      python: python3
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8f9d3c6..34ac201 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2020-2023 NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@ cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 
 include(FetchContent)
 
-project(GPUTreeShap VERSION 23.08.00 LANGUAGES CXX CUDA)
+project(GPUTreeShap VERSION 24.04.00 LANGUAGES CXX CUDA)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CUDA_STANDARD 17)
 
@@ -65,9 +65,8 @@ if(BUILD_GTEST)
   target_compile_options(TestGPUTreeShap PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: ${COMPILE_OPTIONS}>)
   if(NOT MSVC)
     target_compile_options(TestGPUTreeShap PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${GCC_COMPILE_OPTIONS}>)
-  else()
   endif()
-  target_link_libraries(TestGPUTreeShap  PRIVATE gtest gtest_main)
+  target_link_libraries(TestGPUTreeShap PRIVATE GTest::gtest GTest::gtest_main)
 endif()
 
 if(BUILD_EXAMPLES)
diff --git a/Doxyfile.in b/Doxyfile.in
index 09c72a1..f3f1c6f 100644
--- a/Doxyfile.in
+++ b/Doxyfile.in
@@ -38,7 +38,7 @@ PROJECT_NAME           = "GPUTreeShap"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "23.08"
+PROJECT_NUMBER         = "24.04"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/GPUTreeShap/gpu_treeshap.h b/GPUTreeShap/gpu_treeshap.h
index b40f2c9..bc213d3 100644
--- a/GPUTreeShap/gpu_treeshap.h
+++ b/GPUTreeShap/gpu_treeshap.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -1234,15 +1234,15 @@ void ComputeBias(const PathVectorT& device_paths, DoubleVectorT* bias) {
  * ensemble size.
  *
  * \exception std::invalid_argument Thrown when an invalid argument error
- * condition occurs. 
+ * condition occurs.
  * \tparam  PathIteratorT     Thrust type iterator, may be
  * thrust::device_ptr for device memory, or stl iterator/raw pointer for host
- * memory. 
+ * memory.
  * \tparam  PhiIteratorT      Thrust type iterator, may be
  * thrust::device_ptr for device memory, or stl iterator/raw pointer for host
- * memory. Value type must be floating point. 
+ * memory. Value type must be floating point.
  * \tparam  DatasetT User-specified
- * dataset container. 
+ * dataset container.
  * \tparam  DeviceAllocatorT  Optional thrust style
  * allocator.
  *
@@ -1256,13 +1256,13 @@ void ComputeBias(const PathVectorT& device_paths, DoubleVectorT* bias) {
  * root with feature_idx = -1 and zero_fraction = 1.0. The ordering of path
  * elements inside a unique path does not matter - the result will be the same.
  * Paths may contain duplicate features. See the PathElement class for more
- * information. 
- * \param end         Path end iterator. 
+ * information.
+ * \param end         Path end iterator.
  * \param num_groups  Number
  * of output groups. In multiclass classification the algorithm outputs feature
- * contributions per output class. 
+ * contributions per output class.
  * \param phis_begin  Begin iterator for output
- * phis. 
+ * phis.
  * \param phis_end    End iterator for output phis.
  */
 template <typename DeviceAllocatorT = thrust::device_allocator<int>,
diff --git a/README.md b/README.md
index 613090d..75e2aad 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ GPUTreeShap is a cuda implementation of the TreeShap algorithm by Lundberg et al
 See the associated publication [here](https://arxiv.org/abs/2010.13972)
 ```
 @misc{mitchell2022gputreeshap,
-      title={GPUTreeShap: Massively Parallel Exact Calculation of SHAP Scores for Tree Ensembles}, 
+      title={GPUTreeShap: Massively Parallel Exact Calculation of SHAP Scores for Tree Ensembles},
       author={Rory Mitchell and Eibe Frank and Geoffrey Holmes},
       year={2022},
       eprint={2010.13972},
diff --git a/ci/check_style.sh b/ci/check_style.sh
index 788f37e..3e5f4ff 100755
--- a/ci/check_style.sh
+++ b/ci/check_style.sh
@@ -1,27 +1,20 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
-# Ignore errors
-set +e
-RETVAL="0"
+set -euo pipefail
 
+rapids-logger "Create checks conda environment"
 . /opt/conda/etc/profile.d/conda.sh
 
-# Check for a consistent code format
-pip install cpplint
-FORMAT=$(cpplint --recursive GPUTreeShap tests example benchmark 2>&1)
-FORMAT_RETVAL=$?
-if [ "$RETVAL" = "0" ]; then
-  RETVAL=$FORMAT_RETVAL
-fi
+ENV_YAML_DIR="$(mktemp -d)"
 
-# Output results if failure otherwise show pass
-if [ "$FORMAT_RETVAL" != "0" ]; then
-  echo -e "\n\n>>>> FAILED: cpplint format check; begin output\n\n"
-  echo -e "$FORMAT"
-  echo -e "\n\n>>>> FAILED: cpplint format check; end output\n\n"
-else
-  echo -e "\n\n>>>> PASSED: cpplint format check\n\n"
-fi
+rapids-dependency-file-generator \
+  --output conda \
+  --file_key checks \
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml"
 
-exit $RETVAL
+rapids-mamba-retry env create --force -f "${ENV_YAML_DIR}/env.yaml" -n checks
+conda activate checks
+
+# Run pre-commit checks
+pre-commit run --all-files --show-diff-on-failure
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 0172066..c411943 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023 NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 ###############################
 # gputreeshap Version Updater #
 ###############################
@@ -31,7 +31,7 @@ function sed_runner() {
 }
 
 # CMakeLists
-sed_runner 's/'"GPUTREESHAP VERSION .* LANGUAGES"'/'"GPUTREESHAP VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' CMakeLists.txt
+sed_runner 's/'"GPUTreeShap VERSION .* LANGUAGES"'/'"GPUTreeShap VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' CMakeLists.txt
 
 # rapids-cmake version
 sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake
diff --git a/dependencies.yaml b/dependencies.yaml
index 23b7b95..333b122 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -4,21 +4,62 @@ files:
     output: none
     includes:
       - build
+      - cuda_version
+      - cuda
       - docs
+  checks:
+    output: none
+    includes:
+      - checks
 channels:
   - rapidsai
   - conda-forge
 dependencies:
   build:
     common:
-      - output_types: [conda]
+      - output_types: conda
         packages:
           - binutils
           - cmake
-          - gxx <=11
+          - gcc=11.*
+          - gxx=11.*
           - make
+  checks:
+    common:
+      - output_types: [conda, requirements]
+        packages:
+          - pre-commit
+  cuda_version:
+    specific:
+      - output_types: conda
+        matrices:
+          - matrix:
+              cuda: "11.8"
+            packages:
+              - cuda-version=11.8
+          - matrix:
+              cuda: "12.0"
+            packages:
+              - cuda-version=12.0
+          - matrix:
+              cuda: "12.2"
+            packages:
+              - cuda-version=12.2
+  cuda:
+    specific:
+      - output_types: conda
+        matrices:
+          - matrix:
+              cuda: "12.*"
+            packages:
+              - cuda-nvcc
+              - cuda-cudart-dev
+          - matrix:
+              cuda: "11.*"
+            # GPUTreeShap CI uses the container's CUDA 11.8.
+            packages:
   docs:
     common:
-      - output_types: [conda]
+      - output_types: conda
         packages:
           - doxygen
diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake
index dab6e5a..f494812 100644
--- a/fetch_rapids.cmake
+++ b/fetch_rapids.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/GPUTREESHAP_RAPIDS.cmake)
-  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.08/RAPIDS.cmake
+  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.04/RAPIDS.cmake
        ${CMAKE_CURRENT_BINARY_DIR}/GPUTREESHAP_RAPIDS.cmake
   )
 endif()
diff --git a/tests/test_gpu_treeshap.cu b/tests/test_gpu_treeshap.cu
index 4586186..be8d09c 100644
--- a/tests/test_gpu_treeshap.cu
+++ b/tests/test_gpu_treeshap.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <gtest/gtest.h>
+
 #include <GPUTreeShap/gpu_treeshap.h>
 #include <cooperative_groups.h>
 
@@ -25,7 +27,6 @@
 #include <numeric>
 #include <random>
 #include <vector>
-#include "gtest/gtest.h"
 #include "tests/test_utils.h"
 #include "../GPUTreeShap/gpu_treeshap.h"
 

From d6413a12bdf662b9e36191433fe5fb7ea240e4a1 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 2 Apr 2024 18:53:34 -0500
Subject: [PATCH 13/16] Use `conda env create --yes` instead of `--force` (#45)

* Use `conda env create --yes` instead of `--force`.

* Update copyright.
---
 ci/build_and_test.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/build_and_test.sh b/ci/build_and_test.sh
index d9079b4..2a02816 100755
--- a/ci/build_and_test.sh
+++ b/ci/build_and_test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 set -euo pipefail
 . /opt/conda/etc/profile.d/conda.sh
 
@@ -9,7 +9,7 @@ rapids-dependency-file-generator \
   --file_key build \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
 
-rapids-mamba-retry env create --force -f env.yaml -n test
+rapids-mamba-retry env create --yes -f env.yaml -n test
 
 # Temporarily allow unbound variables for conda activation.
 set +u

From b0705092987adfa149ebeacf5ee965fee36c875f Mon Sep 17 00:00:00 2001
From: Ray Douglass <3107146+raydouglass@users.noreply.github.com>
Date: Fri, 7 Jun 2024 17:17:51 -0400
Subject: [PATCH 14/16] Adopt CI/packaging codeowners (#48)

* Adopt CI/packaging codeowners

* Update .github/CODEOWNERS

Co-authored-by: Bradley Dice <bdice@bradleydice.com>

---------

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
---
 .github/CODEOWNERS | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 409d2cd..eca0648 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,7 +1,14 @@
 #gputreeshap code owners
 *                  @rapidsai/gputreeshap-write
 
-#build/ops code owners
-.github/           @rapidsai/ops-codeowners
-ci/                @rapidsai/ops-codeowners
-dependencies.yaml  @rapidsai/ops-codeowners
+#CI code owners
+/.github/                @rapidsai/ci-codeowners
+/ci/                     @rapidsai/ci-codeowners
+/.pre-commit-config.yaml @rapidsai/ci-codeowners
+
+#packaging code owners
+/.devcontainer/    @rapidsai/packaging-codeowners
+/conda/            @rapidsai/packaging-codeowners
+/dependencies.yaml @rapidsai/packaging-codeowners
+/build.sh          @rapidsai/packaging-codeowners
+pyproject.toml     @rapidsai/packaging-codeowners

From 55441c4c07d19fd874a4804aeccf6d149694757d Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 17 Jun 2024 10:40:42 -0500
Subject: [PATCH 15/16] resolve file_key deprecation warning from
 rapids-dependency-file-generator (#49)

---
 .pre-commit-config.yaml | 5 +++++
 ci/build_and_test.sh    | 2 +-
 ci/check_style.sh       | 4 ++--
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e9ccfab..a9533ad 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -16,6 +16,11 @@ repos:
     hooks:
       - id: verify-copyright
         args: ["--fix", "--main-branch", "main"]
+  - repo: https://github.com/rapidsai/dependency-file-generator
+    rev: v1.13.11
+    hooks:
+      - id: rapids-dependency-file-generator
+        args: ["--clean"]
 
 default_language_version:
       python: python3
diff --git a/ci/build_and_test.sh b/ci/build_and_test.sh
index 2a02816..428ce93 100755
--- a/ci/build_and_test.sh
+++ b/ci/build_and_test.sh
@@ -6,7 +6,7 @@ set -euo pipefail
 rapids-logger "Generate C++ build & test dependencies"
 rapids-dependency-file-generator \
   --output conda \
-  --file_key build \
+  --file-key build \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
 
 rapids-mamba-retry env create --yes -f env.yaml -n test
diff --git a/ci/check_style.sh b/ci/check_style.sh
index 3e5f4ff..5e182be 100755
--- a/ci/check_style.sh
+++ b/ci/check_style.sh
@@ -10,10 +10,10 @@ ENV_YAML_DIR="$(mktemp -d)"
 
 rapids-dependency-file-generator \
   --output conda \
-  --file_key checks \
+  --file-key checks \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml"
 
-rapids-mamba-retry env create --force -f "${ENV_YAML_DIR}/env.yaml" -n checks
+rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n checks
 conda activate checks
 
 # Run pre-commit checks

From 40eae8c4c45974705f8053e4d3d05b88e3cfaefd Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Tue, 20 Aug 2024 02:26:45 +0800
Subject: [PATCH 16/16] Avoid the use of shared variables. (#50)

- Use register instead.
- Fix Python benchmark script with latest pandas and XGBoost.
- Update the CI.

---------

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
---
 .github/workflows/pr.yaml  |   6 +-
 CMakeLists.txt             |   2 +-
 Doxyfile.in                |   2 +-
 GPUTreeShap/gpu_treeshap.h |   8 +--
 benchmark/benchmark.py     | 109 ++++++++++++++++++++++++-------------
 dependencies.yaml          |   4 ++
 fetch_rapids.cmake         |   2 +-
 7 files changed, 83 insertions(+), 50 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 4f044a4..38a092e 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -15,16 +15,16 @@ jobs:
       - checks
       - cpp-build-test
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10
   checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10
     with:
       enable_check_generated_files: false
   cpp-build-test:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10
     with:
       build_type: pull-request
       script: ci/build_and_test.sh
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 34ac201..ef2a7cd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,7 +19,7 @@ cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 
 include(FetchContent)
 
-project(GPUTreeShap VERSION 24.04.00 LANGUAGES CXX CUDA)
+project(GPUTreeShap VERSION 24.10.00 LANGUAGES CXX CUDA)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CUDA_STANDARD 17)
 
diff --git a/Doxyfile.in b/Doxyfile.in
index f3f1c6f..49a869f 100644
--- a/Doxyfile.in
+++ b/Doxyfile.in
@@ -38,7 +38,7 @@ PROJECT_NAME           = "GPUTreeShap"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "24.04"
+PROJECT_NUMBER         = "24.10"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/GPUTreeShap/gpu_treeshap.h b/GPUTreeShap/gpu_treeshap.h
index bc213d3..c9d5d6c 100644
--- a/GPUTreeShap/gpu_treeshap.h
+++ b/GPUTreeShap/gpu_treeshap.h
@@ -459,15 +459,13 @@ __global__ void __launch_bounds__(GPUTREESHAP_MAX_THREADS_PER_BLOCK)
                const PathElement<SplitConditionT>* path_elements,
                const size_t* bin_segments, size_t num_groups, double* phis) {
   // Use shared memory for structs, otherwise nvcc puts in local memory
-  __shared__ DatasetT s_X;
-  s_X = X;
   __shared__ PathElement<SplitConditionT> s_elements[kBlockSize];
   PathElement<SplitConditionT>& e = s_elements[threadIdx.x];
 
   size_t start_row, end_row;
   bool thread_active;
   ConfigureThread<DatasetT, kBlockSize, kRowsPerWarp>(
-      s_X, bins_per_row, path_elements, bin_segments, &start_row, &end_row, &e,
+      X, bins_per_row, path_elements, bin_segments, &start_row, &end_row, &e,
       &thread_active);
   uint32_t mask = __ballot_sync(FULL_MASK, thread_active);
   if (!thread_active) return;
@@ -564,15 +562,13 @@ __global__ void __launch_bounds__(GPUTREESHAP_MAX_THREADS_PER_BLOCK)
                            const size_t* bin_segments, size_t num_groups,
                            double* phis_interactions) {
   // Use shared memory for structs, otherwise nvcc puts in local memory
-  __shared__ DatasetT s_X;
-  s_X = X;
   __shared__ PathElement<SplitConditionT> s_elements[kBlockSize];
   PathElement<SplitConditionT>* e = &s_elements[threadIdx.x];
 
   size_t start_row, end_row;
   bool thread_active;
   ConfigureThread<DatasetT, kBlockSize, kRowsPerWarp>(
-      s_X, bins_per_row, path_elements, bin_segments, &start_row, &end_row, e,
+      X, bins_per_row, path_elements, bin_segments, &start_row, &end_row, e,
       &thread_active);
   uint32_t mask = __ballot_sync(FULL_MASK, thread_active);
   if (!thread_active) return;
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
index 70caecd..dc916b8 100644
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -1,42 +1,62 @@
-import xgboost as xgb
-import numpy as np
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from __future__ import annotations
+
+import argparse
 import time
-from sklearn import datasets
-from joblib import Memory
+
+import numpy as np
 import pandas as pd
-import argparse
+import xgboost as xgb
+from joblib import Memory
+from sklearn import datasets
 
 memory = Memory('./cachedir', verbose=0)
 
 
 # Contains a dataset in numpy format as well as the relevant objective and metric
 class TestDataset:
-    def __init__(self, name, Xy, objective
-                 ):
+    def __init__(self, name, Xy, objective):
         self.name = name
         self.objective = objective
         self.X, self.y = Xy
 
     def set_params(self, params_in):
-        params_in['objective'] = self.objective
+        params_in["objective"] = self.objective
         if self.objective == "multi:softmax":
             params_in["num_class"] = int(np.max(self.y) + 1)
         return params_in
 
     def get_dmat(self):
-        return xgb.DMatrix(self.X, self.y)
+        return xgb.QuantileDMatrix(self.X, self.y, enable_categorical=True)
 
     def get_test_dmat(self, num_rows):
         rs = np.random.RandomState(432)
-        return xgb.DMatrix(self.X[rs.randint(0, self.X.shape[0], size=num_rows), :])
+        if hasattr(self.X, "iloc"):
+            x = self.X.iloc[rs.randint(0, self.X.shape[0], size=num_rows), :]
+        else:
+            x = self.X[rs.randint(0, self.X.shape[0], size=num_rows), :]
+        return xgb.DMatrix(x, enable_categorical=True)
 
 
 @memory.cache
-def train_model(dataset, max_depth, num_rounds):
+def train_model(dataset: TestDataset, max_depth: int, num_rounds: int) -> xgb.Booster:
     dmat = dataset.get_dmat()
-    params = {'tree_method': 'gpu_hist', 'max_depth': max_depth, 'eta': 0.01}
+    params = {'tree_method': 'hist', "device": "gpu", 'max_depth': max_depth, 'eta': 0.01}
     params = dataset.set_params(params)
-    model = xgb.train(params, dmat, num_rounds, [(dmat, 'train')])
+    model = xgb.train(params, dmat, num_rounds, evals=[(dmat, 'train')])
     return model
 
 
@@ -64,33 +84,34 @@ def get_model_stats(model):
 
 
 class Model:
-    def __init__(self, name, dataset, num_rounds, max_depth):
+    def __init__(
+        self, name: str, dataset: TestDataset, num_rounds: int, max_depth: int
+    ) -> None:
         self.name = name
         self.dataset = dataset
         self.num_rounds = num_rounds
         self.max_depth = max_depth
         print("Training " + name)
         self.xgb_model = train_model(dataset, max_depth, num_rounds)
-        self.num_trees, self.num_leaves, self.average_depth = get_model_stats(self.xgb_model)
+        self.num_trees, self.num_leaves, self.average_depth = get_model_stats(
+            self.xgb_model
+        )
 
 
 def check_accuracy(shap, margin):
-    if len(shap.shape) == 2:
-        sum = np.sum(shap, axis=len(shap.shape) - 1)
-    else:
-        sum = np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2))
+    shap = np.sum(shap, axis=len(shap.shape) - 1)
 
-    if not np.allclose(sum, margin, 1e-1, 1e-1):
+    if not np.allclose(shap, margin, 1e-1, 1e-1):
         print("Warning: Failed 1e-1 accuracy")
 
 
-def get_models(model):
+def get_models(model: str) -> list[Model]:
     test_datasets = [
+        TestDataset("adult", fetch_adult(), "binary:logistic"),
         TestDataset("covtype", datasets.fetch_covtype(return_X_y=True), "multi:softmax"),
         TestDataset("cal_housing", datasets.fetch_california_housing(return_X_y=True),
                     "reg:squarederror"),
         TestDataset("fashion_mnist", fetch_fashion_mnist(), "multi:softmax"),
-        TestDataset("adult", fetch_adult(), "binary:logistic"),
     ]
 
     models = []
@@ -110,24 +131,37 @@ def get_models(model):
 def print_model_stats(models, args):
     # get model statistics
     models_df = pd.DataFrame(
-        columns=["model", "num_rounds", "num_trees", "num_leaves", "max_depth", "average_depth"])
-    for m in models:
-        models_df = models_df.append(
-            {"model": m.name, "num_rounds": m.num_rounds, "num_trees": m.num_trees,
-             "num_leaves": m.num_leaves, "max_depth": m.max_depth,
-             "average_depth": m.average_depth},
-            ignore_index=True)
+        columns=[
+            "model",
+            "num_rounds",
+            "num_trees",
+            "num_leaves",
+            "max_depth",
+            "average_depth",
+        ]
+    )
+    for i, m in enumerate(models):
+        df = pd.DataFrame.from_dict(
+            {
+                "model": [m.name],
+                "num_rounds": [m.num_rounds],
+                "num_trees": [m.num_trees],
+                "num_leaves": [m.num_leaves],
+                "max_depth": [m.max_depth],
+                "average_depth": [m.average_depth],
+            }
+        )
+        models_df = pd.concat([models_df, df])
     print(models_df)
     print("Writing model statistics to: " + args.out_models)
     models_df.to_csv(args.out_models, index=False)
 
 
-def run_benchmark(args):
-    models = get_models(args)
+def run_benchmark(args: argparse.Namespace) -> None:
+    models = get_models(args.model)
     print_model_stats(models, args)
 
-    predictors = ["cpu_predictor", "gpu_predictor"]
-    # predictors = ["gpu_predictor"]
+    devices = ["cpu", "gpu"]
     test_rows = args.nrows
     df = pd.DataFrame(
         columns=["model", "test_rows", "cpu_time(s)", "cpu_std", "gpu_time(s)", "gpu_std",
@@ -135,8 +169,8 @@ def run_benchmark(args):
     for m in models:
         dtest = m.dataset.get_test_dmat(test_rows)
         result_row = {"model": m.name, "test_rows": test_rows, "cpu_time(s)": 0.0}
-        for p in predictors:
-            m.xgb_model.set_param({"predictor": p})
+        for p in devices:
+            m.xgb_model.set_param({"device": p})
             samples = []
             for i in range(args.niter):
                 start = time.perf_counter()
@@ -145,7 +179,7 @@ def run_benchmark(args):
                 else:
                     xgb_shap = m.xgb_model.predict(dtest, pred_contribs=True)
                 samples.append(time.perf_counter() - start)
-            if p is "gpu_predictor":
+            if p == "gpu":
                 result_row["gpu_time(s)"] = np.mean(samples)
                 result_row["gpu_std"] = np.std(samples)
             else:
@@ -156,8 +190,7 @@ def run_benchmark(args):
             check_accuracy(xgb_shap, margin)
 
         result_row["speedup"] = result_row["cpu_time(s)"] / result_row["gpu_time(s)"]
-        df = df.append(result_row,
-                       ignore_index=True)
+        df = pd.concat([df, pd.DataFrame.from_records([result_row])])
         print(df)
     print("Writing results to: " + args.out)
     df.to_csv(args.out, index=False)
diff --git a/dependencies.yaml b/dependencies.yaml
index 333b122..360a5f4 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -45,6 +45,10 @@ dependencies:
               cuda: "12.2"
             packages:
               - cuda-version=12.2
+          - matrix:
+              cuda: "12.5"
+            packages:
+              - cuda-version=12.5
   cuda:
     specific:
       - output_types: conda
diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake
index f494812..8e2ef84 100644
--- a/fetch_rapids.cmake
+++ b/fetch_rapids.cmake
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/GPUTREESHAP_RAPIDS.cmake)
-  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.04/RAPIDS.cmake
+  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.10/RAPIDS.cmake
        ${CMAKE_CURRENT_BINARY_DIR}/GPUTREESHAP_RAPIDS.cmake
   )
 endif()