Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimizing multi-source byte range reading in JSON reader #15396

Merged
merged 48 commits into from
Apr 30, 2024
Merged
Changes from 1 commit
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
697cf65
byte range reader improvement
shrshi Mar 26, 2024
115c2c6
subchunk size heuristic; multistream d2d copy; small logic fix
shrshi Mar 27, 2024
c99e4ef
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Mar 27, 2024
7f97196
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Mar 27, 2024
615f005
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Apr 1, 2024
0ac251d
overhaul commit
shrshi Apr 4, 2024
5c21ee4
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Apr 4, 2024
09641db
format fix
shrshi Apr 5, 2024
c186435
Merge branch 'byte-range-improvement' of github.com:shrshi/cudf into …
shrshi Apr 5, 2024
e912671
more fixes
shrshi Apr 5, 2024
8557cf9
cleanup
shrshi Apr 5, 2024
16f7e7f
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Apr 5, 2024
3582358
fixes
shrshi Apr 5, 2024
90b5246
Merge branch 'byte-range-improvement' of github.com:shrshi/cudf into …
shrshi Apr 5, 2024
02a556d
cleanup
shrshi Apr 5, 2024
c7a2799
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Apr 5, 2024
9901f97
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Apr 9, 2024
a535136
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Apr 10, 2024
7f44bf4
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Apr 10, 2024
685127c
logic fix
shrshi Apr 15, 2024
ad9af4d
Merge branch 'byte-range-improvement' of github.com:shrshi/cudf into …
shrshi Apr 15, 2024
458bc67
fix to initial allocation for compressed file input
shrshi Apr 16, 2024
d080a5f
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Apr 16, 2024
0e30c85
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Apr 16, 2024
28fedd1
removed uniq ptrs, passing device uvecs directly
shrshi Apr 17, 2024
f1bf818
cleanup; so many fixes
shrshi Apr 17, 2024
f29f223
Merge branch 'byte-range-improvement' of github.com:shrshi/cudf into …
shrshi Apr 17, 2024
21f07c2
merge
shrshi Apr 17, 2024
0066f2e
Merge branch 'branch-24.06' of github.com:rapidsai/cudf into byte-ran…
shrshi Apr 18, 2024
6032949
partially addressing PR reviews
shrshi Apr 18, 2024
c503e33
addressing pr reviews
shrshi Apr 18, 2024
5004f0c
formatting
shrshi Apr 18, 2024
a1fe36b
reducing memalloc for whole file read
shrshi Apr 22, 2024
6c99591
merge
shrshi Apr 22, 2024
e4c04cd
addressing PR reviews
shrshi Apr 22, 2024
79fa4f3
partially address PR reviews
shrshi Apr 22, 2024
1b0c8f8
docs fix
shrshi Apr 23, 2024
5dc53d8
addressing PR reviews
shrshi Apr 23, 2024
d29fdf8
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Apr 23, 2024
bd18397
fix
shrshi Apr 24, 2024
a5e49af
Merge branch 'byte-range-improvement' of github.com:shrshi/cudf into …
shrshi Apr 24, 2024
54daff2
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Apr 25, 2024
9075159
partially addressing reviews
shrshi Apr 29, 2024
7d826af
PR reviews
shrshi Apr 29, 2024
fb9fdae
Merge branch 'byte-range-improvement' of github.com:shrshi/cudf into …
shrshi Apr 29, 2024
032a5da
Merge branch 'branch-24.06' into byte-range-improvement
shrshi Apr 29, 2024
329f9ae
adding consts
shrshi Apr 30, 2024
df68938
Merge branch 'byte-range-improvement' of github.com:shrshi/cudf into …
shrshi Apr 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
subchunk size heuristic; multistream d2d copy; small logic fix
  • Loading branch information
shrshi committed Mar 27, 2024
commit 115c2c6375e3ba402d73bd17f18c898cff55ef43
28 changes: 21 additions & 7 deletions cpp/src/io/json/read_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "read_json.hpp"

#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/stream_pool.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/io/detail/json.hpp>
#include <cudf/utilities/error.hpp>
Expand Down Expand Up @@ -184,12 +185,14 @@ auto get_record_range_raw_input(host_span<std::unique_ptr<datasource>> sources,
total_source_size) {
// Find next delimiter
/*
* TODO: is there a good heuristic to set the subchunk size? Setting number of subchunks per
* byte_range_size could be bad if the range size is large.
* NOTE: heuristic for choosing subchunk size: geometric mean of minimum subchunk size (set to
* 10kb) and the byte range size
*/
std::int64_t next_delim_pos = -1;
constexpr int num_subchunks = 10; // per byte_range_size
size_t size_per_subchunk = reader_opts.get_byte_range_size() / num_subchunks;
auto geometric_mean = [](double a, double b) { return std::pow(a * b, 0.5); };
size_t size_per_subchunk =
geometric_mean(reader_opts.get_byte_range_size() / num_subchunks, 10000);
size_t next_subchunk_start =
reader_opts.get_byte_range_offset() + reader_opts.get_byte_range_size();
std::vector<rmm::device_uvector<char>> subchunk_buffers;
Expand All @@ -206,11 +209,22 @@ auto get_record_range_raw_input(host_span<std::unique_ptr<datasource>> sources,
merged.resize(
vuule marked this conversation as resolved.
Show resolved Hide resolved
cur_chunk_buf.size() + ((subchunk_buffers.size() - 1) * size_per_subchunk) + next_delim_pos,
stream);
size_t offset = cur_chunk_buf.size();
// TODO: Can do this with a stream pool?
for (size_t i = 0; i < subchunk_buffers.size() - 1; i++) {
size_t offset = cur_chunk_buf.size() - first_delim_pos;
if (subchunk_buffers.size() >= 3) {
vuule marked this conversation as resolved.
Show resolved Hide resolved
std::vector<rmm::cuda_stream_view> copy_streams =
cudf::detail::fork_streams(stream, subchunk_buffers.size() - 1);
for (size_t i = 0; i < subchunk_buffers.size() - 1; i++) {
CUDF_CUDA_TRY(cudaMemcpyAsync(merged.data() + offset,
subchunk_buffers[i].data(),
size_per_subchunk,
cudaMemcpyDeviceToDevice,
copy_streams[i]));
offset += size_per_subchunk;
}
cudf::detail::join_streams(copy_streams, stream);
} else if (subchunk_buffers.size() == 2) {
CUDF_CUDA_TRY(cudaMemcpyAsync(merged.data() + offset,
subchunk_buffers[i].data(),
subchunk_buffers[0].data(),
size_per_subchunk,
cudaMemcpyDeviceToDevice,
stream));
Expand Down
Loading