Merge branch 'branch-24.10' of github.com:rapidsai/cudf into test/rem…

…ove-xfail-torch-integration-test
rapidsai · Sep 5, 2024 · 163fa64 · 163fa64
2 parents 54b60de + 949f171
commit 163fa64
Show file tree

Hide file tree

Showing 75 changed files with 1,548 additions and 601 deletions.
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -71,6 +71,8 @@ jobs:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
     with:
+      # build for every combination of arch and CUDA version, but only for the latest Python
+      matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
       sha: ${{ inputs.sha }}

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -56,14 +56,21 @@ jobs:
       - name: Checkout code repo
         uses: actions/checkout@v4
         with:
-          ref: ${{ inputs.sha }}
-          fetch-depth: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).commits }}
+          fetch-depth: 0
           persist-credentials: false
+      - name: Calculate merge base
+        id: calculate-merge-base
+        env:
+          PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
+          BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
+        run: |
+          (echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") > "$GITHUB_OUTPUT"
       - name: Get changed files
         id: changed-files
         uses: tj-actions/changed-files@v45
         with:
-          base_sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
+          base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }}
+          sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
           files_yaml: |
             cpp:
               - '**'
@@ -190,6 +197,7 @@ jobs:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
     with:
+      # build for every combination of arch and CUDA version, but only for the latest Python
       matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
       build_type: pull-request
       script: "ci/build_wheel_libcudf.sh"

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -132,6 +132,7 @@ jobs:
       branch: ${{ inputs.branch }}
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
+      node_type: "gpu-v100-latest-1"
       container_image: "rapidsai/ci-conda:latest"
       run_script: |
         ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh
@@ -54,8 +54,19 @@ else
     RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
     RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
 
-    # echo to expand wildcard before adding `[extra]` requires for pip
+    echo "" > ./constraints.txt
+    if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
+        # `test_python` constraints are for `[test]` not `[cudf-pandas-tests]`
+        rapids-dependency-file-generator \
+            --output requirements \
+            --file-key test_python \
+            --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
+        | tee ./constraints.txt
+    fi
+
     python -m pip install \
+        -v \
+        --constraint ./constraints.txt \
         "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test,cudf-pandas-tests]" \
         "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
         "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"

diff --git a/...party-integration/ci_run_library_tests.sh → ...rd-party-integration/run-library-tests.sh b/...party-integration/ci_run_library_tests.sh → ...rd-party-integration/run-library-tests.sh
@@ -9,23 +9,17 @@ cleanup() {
 
 trap cleanup EXIT
 
-runtest_gold() {
+runtest() {
     local lib=$1
+    local mode=$2
 
-    pytest \
-    -v \
-    --continue-on-collection-errors \
-    --cache-clear \
-    --numprocesses=${NUM_PROCESSES} \
-    --dist=worksteal \
-    ${TEST_DIR}/test_${lib}*.py
-}
-
-runtest_cudf_pandas() {
-    local lib=$1
+    local plugin=""
+    if [ "$mode" = "cudf" ]; then
+        plugin="-p cudf.pandas"
+    fi
 
     pytest \
-    -p cudf.pandas \
+    $plugin \
     -v \
     --continue-on-collection-errors \
     --cache-clear \
@@ -38,8 +32,8 @@ main() {
     local lib=$1
 
     # generation phase
-    runtest_gold ${lib}
-    runtest_cudf_pandas ${lib}
+    runtest ${lib} "gold"
+    runtest ${lib} "cudf"
 
     # assertion phase
     pytest \

diff --git a/ci/cudf_pandas_scripts/third-party-integration/test.sh b/ci/cudf_pandas_scripts/third-party-integration/test.sh
@@ -72,7 +72,7 @@ main() {
             fi
         done
 
-        TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh ${lib}
+        TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh ${lib}
 
         rapids-logger "Test script exiting with value: ${EXITCODE}"
     done

diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
@@ -14,7 +14,8 @@ ENV_YAML_DIR="$(mktemp -d)"
 rapids-dependency-file-generator \
   --output conda \
   --file-key test_python \
-  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml"
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
+    | tee "${ENV_YAML_DIR}/env.yaml"
 
 rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n test
 

diff --git a/ci/test_wheel_cudf.sh b/ci/test_wheel_cudf.sh
@@ -10,8 +10,22 @@ RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from
 RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
 RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
 
+rapids-logger "Install cudf, pylibcudf, and test requirements"
+
+# Constrain to minimum dependency versions if job is set up as "oldest"
+echo "" > ./constraints.txt
+if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
+    rapids-dependency-file-generator \
+        --output requirements \
+        --file-key py_test_cudf \
+        --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
+      | tee ./constraints.txt
+fi
+
 # echo to expand wildcard before adding `[extra]` requires for pip
 python -m pip install \
+    -v \
+    --constraint ./constraints.txt \
   "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
   "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
   "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]"

diff --git a/ci/test_wheel_cudf_polars.sh b/ci/test_wheel_cudf_polars.sh
@@ -25,9 +25,20 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f
 RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
 
 rapids-logger "Installing cudf_polars and its dependencies"
+# Constraint to minimum dependency versions if job is set up as "oldest"
+echo "" > ./constraints.txt
+if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
+    rapids-dependency-file-generator \
+        --output requirements \
+        --file-key py_test_cudf_polars \
+        --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
+      | tee ./constraints.txt
+fi
 
 # echo to expand wildcard before adding `[extra]` requires for pip
 python -m pip install \
+    -v \
+    --constraint ./constraints.txt \
     "$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
     "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
     "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"

diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh
@@ -11,8 +11,21 @@ RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from
 RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
 RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
 
+rapids-logger "Install dask_cudf, cudf, pylibcudf, and test requirements"
+# Constraint to minimum dependency versions if job is set up as "oldest"
+echo "" > ./constraints.txt
+if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
+    rapids-dependency-file-generator \
+        --output requirements \
+        --file-key py_test_dask_cudf \
+        --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
+      | tee ./constraints.txt
+fi
+
 # echo to expand wildcard before adding `[extra]` requires for pip
 python -m pip install \
+  -v \
+  --constraint ./constraints.txt \
   "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
   "$(echo ./dist/dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
   "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \

diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
@@ -469,7 +469,7 @@ std::unique_ptr<column> make_strings_column(size_type num_strings,
  * offsets (depth 1)   {0, 2, 5, 7}
  * data    (depth 1)
  * offsets (depth 2)
- * data    (depth 1)   {1, 2, 3, 4, 5, 6, 7}
+ * data    (depth 2)   {1, 2, 3, 4, 5, 6, 7}
  * @endcode
  *
  * @param[in] num_rows The number of lists the column represents.

diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu
@@ -138,14 +138,14 @@ datasource::owning_buffer<rmm::device_buffer> get_record_range_raw_input(
   auto should_load_all_sources = !chunk_size || chunk_size >= total_source_size - chunk_offset;
   chunk_size = should_load_all_sources ? total_source_size - chunk_offset : chunk_size;
 
-  int const num_subchunks_prealloced  = should_load_all_sources ? 0 : max_subchunks_prealloced;
+  int num_subchunks_prealloced        = should_load_all_sources ? 0 : max_subchunks_prealloced;
   std::size_t const size_per_subchunk = estimate_size_per_subchunk(chunk_size);
 
   // The allocation for single source compressed input is estimated by assuming a ~4:1
   // compression ratio. For uncompressed inputs, we can getter a better estimate using the idea
   // of subchunks.
   auto constexpr header_size = 4096;
-  std::size_t const buffer_size =
+  std::size_t buffer_size =
     reader_compression != compression_type::NONE
       ? total_source_size * estimated_compression_ratio + header_size
       : std::min(total_source_size, chunk_size + num_subchunks_prealloced * size_per_subchunk) +
@@ -169,18 +169,40 @@ datasource::owning_buffer<rmm::device_buffer> get_record_range_raw_input(
     // Find next delimiter
     std::int64_t next_delim_pos     = -1;
     std::size_t next_subchunk_start = chunk_offset + chunk_size;
-    while (next_subchunk_start < total_source_size && next_delim_pos < buffer_offset) {
-      buffer_offset += readbufspan.size();
-      readbufspan    = ingest_raw_input(bufspan.last(buffer_size - buffer_offset),
-                                     sources,
-                                     reader_compression,
-                                     next_subchunk_start,
-                                     size_per_subchunk,
-                                     stream);
-      next_delim_pos = find_first_delimiter(readbufspan, '\n', stream) + buffer_offset;
-      if (next_delim_pos < buffer_offset) { next_subchunk_start += size_per_subchunk; }
+    while (next_delim_pos < buffer_offset) {
+      for (int subchunk = 0;
+           subchunk < num_subchunks_prealloced && next_delim_pos < buffer_offset &&
+           next_subchunk_start < total_source_size;
+           subchunk++) {
+        buffer_offset += readbufspan.size();
+        readbufspan    = ingest_raw_input(bufspan.last(buffer_size - buffer_offset),
+                                       sources,
+                                       reader_compression,
+                                       next_subchunk_start,
+                                       size_per_subchunk,
+                                       stream);
+        next_delim_pos = find_first_delimiter(readbufspan, '\n', stream) + buffer_offset;
+        next_subchunk_start += size_per_subchunk;
+      }
+      if (next_delim_pos < buffer_offset) {
+        if (next_subchunk_start >= total_source_size) {
+          // If we have reached the end of source list but the source does not terminate with a
+          // newline character
+          next_delim_pos = buffer_offset + readbufspan.size();
+        } else {
+          // Our buffer_size estimate is insufficient to read until the end of the line! We need to
+          // allocate more memory and try again!
+          num_subchunks_prealloced *= 2;
+          buffer_size = reader_compression != compression_type::NONE
+                          ? 2 * buffer_size
+                          : std::min(total_source_size,
+                                     buffer_size + num_subchunks_prealloced * size_per_subchunk) +
+                              num_extra_delimiters;
+          buffer.resize(buffer_size, stream);
+          bufspan = device_span<char>(reinterpret_cast<char*>(buffer.data()), buffer.size());
+        }
+      }
     }
-    if (next_delim_pos < buffer_offset) next_delim_pos = buffer_offset + readbufspan.size();
 
     return datasource::owning_buffer<rmm::device_buffer>(
       std::move(buffer),

diff --git a/cpp/src/join/mixed_join_kernel.cuh b/cpp/src/join/mixed_join_kernel.cuh
@@ -130,7 +130,7 @@ void launch_mixed_join(table_device_view left_table,
                        int64_t shmem_size_per_block,
                        rmm::cuda_stream_view stream)
 {
-  mixed_join<DEFAULT_JOIN_BLOCK_SIZE, true>
+  mixed_join<DEFAULT_JOIN_BLOCK_SIZE, has_nulls>
     <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
       left_table,
       right_table,

diff --git a/cpp/src/join/mixed_join_size_kernel.cuh b/cpp/src/join/mixed_join_size_kernel.cuh
@@ -124,7 +124,7 @@ std::size_t launch_compute_mixed_join_output_size(
   // Allocate storage for the counter used to get the size of the join output
   rmm::device_scalar<std::size_t> size(0, stream, mr);
 
-  compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, true>
+  compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, has_nulls>
     <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
       left_table,
       right_table,
-Original file line number
+Diff line change
@@ Expand Up / @@ -72,7 +72,7 @@ main() { @@
                 fi
             done
-            TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh ${lib}
+            TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh ${lib}
             rapids-logger "Test script exiting with value: ${EXITCODE}"
         done
@@ Expand Down @@