Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-34535: [C++] Move ChunkResolver to the public API #40226

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions cpp/src/arrow/chunk_resolver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include "arrow/record_batch.h"

namespace arrow {
namespace internal {

namespace {
template <typename T>
Expand Down Expand Up @@ -65,5 +64,4 @@ ChunkResolver::ChunkResolver(const std::vector<const Array*>& chunks)
ChunkResolver::ChunkResolver(const RecordBatchVector& batches)
: offsets_(MakeChunksOffsets(batches)), cached_chunk_(0) {}

} // namespace internal
} // namespace arrow
22 changes: 11 additions & 11 deletions cpp/src/arrow/chunk_resolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,22 @@
#include "arrow/type_fwd.h"
#include "arrow/util/macros.h"

namespace arrow::internal {
namespace arrow {

struct ChunkLocation {
/// \brief Index of the chunk in the array of chunks
///
/// The value is always in the range `[0, chunks.size()]`. `chunks.size()` is used
/// to represent out-of-bounds locations.
int64_t chunk_index;
int64_t chunk_index = 0;

/// \brief Index of the value in the chunk
///
/// The value is undefined if chunk_index >= chunks.size()
int64_t index_in_chunk;
int64_t index_in_chunk = 0;
};

/// \class ChunkResolver
/// \brief An utility that incrementally resolves logical indices into
/// physical indices in a chunked array.
struct ARROW_EXPORT ChunkResolver {
Expand Down Expand Up @@ -75,8 +76,8 @@ struct ARROW_EXPORT ChunkResolver {
/// The returned ChunkLocation contains the chunk index and the within-chunk index
/// equivalent to the logical index.
///
/// \pre index >= 0
/// \post location.chunk_index in [0, chunks.size()]
/// \pre `index >= 0`
/// \post location.chunk_index in `[0, chunks.size()]`
/// \param index The logical index to resolve
/// \return ChunkLocation with a valid chunk_index if index is within
/// bounds, or with chunk_index == chunks.size() if logical index is
Expand All @@ -96,16 +97,15 @@ struct ARROW_EXPORT ChunkResolver {
/// \pre index >= 0
/// \post location.chunk_index in [0, chunks.size()]
/// \param index The logical index to resolve
/// \param cached_chunk_index 0 or the chunk_index of the last ChunkLocation
/// \param hint The last ChunkLocation or a default-intitialized ChunkLocation
/// returned by this ChunkResolver.
/// \return ChunkLocation with a valid chunk_index if index is within
/// bounds, or with chunk_index == chunks.size() if logical index is
/// `>= chunked_array.length()`.
inline ChunkLocation ResolveWithChunkIndexHint(int64_t index,
int64_t cached_chunk_index) const {
assert(cached_chunk_index < static_cast<int64_t>(offsets_.size()));
inline ChunkLocation ResolveWithHint(int64_t index, ChunkLocation hint) const {
assert(hint.chunk_index < static_cast<int64_t>(offsets_.size()));
const auto chunk_index =
ResolveChunkIndex</*StoreCachedChunk=*/false>(index, cached_chunk_index);
ResolveChunkIndex</*StoreCachedChunk=*/false>(index, hint.chunk_index);
return {chunk_index, index - offsets_[chunk_index]};
}

Expand Down Expand Up @@ -165,4 +165,4 @@ struct ARROW_EXPORT ChunkResolver {
}
};

} // namespace arrow::internal
} // namespace arrow
2 changes: 1 addition & 1 deletion cpp/src/arrow/chunked_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ class ARROW_EXPORT ChunkedArray {
private:
template <typename T, typename V>
friend class ::arrow::stl::ChunkedArrayIterator;
internal::ChunkResolver chunk_resolver_;
ChunkResolver chunk_resolver_;
ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
};

Expand Down
8 changes: 4 additions & 4 deletions cpp/src/arrow/compute/kernels/chunked_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,16 @@ struct ResolvedChunk<Array> {
bool IsNull() const { return array->IsNull(index); }
};

struct ChunkedArrayResolver : protected ::arrow::internal::ChunkResolver {
struct ChunkedArrayResolver : protected ::arrow::ChunkResolver {
ChunkedArrayResolver(const ChunkedArrayResolver& other)
: ::arrow::internal::ChunkResolver(other.chunks_), chunks_(other.chunks_) {}
: ::arrow::ChunkResolver(other.chunks_), chunks_(other.chunks_) {}

explicit ChunkedArrayResolver(const std::vector<const Array*>& chunks)
: ::arrow::internal::ChunkResolver(chunks), chunks_(chunks) {}
: ::arrow::ChunkResolver(chunks), chunks_(chunks) {}

template <typename ArrayType>
ResolvedChunk<ArrayType> Resolve(int64_t index) const {
const auto loc = ::arrow::internal::ChunkResolver::Resolve(index);
const auto loc = ::arrow::ChunkResolver::Resolve(index);
Comment on lines +64 to +73
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could actually remove the ::arrow now because the compile won't be confused by ::arrow::internal and ::arrow::compute::internal anymore.

return {checked_cast<const ArrayType*>(chunks_[loc.chunk_index]), loc.index_in_chunk};
}

Expand Down
21 changes: 7 additions & 14 deletions cpp/src/arrow/compute/kernels/vector_sort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
namespace arrow {

using internal::checked_cast;
using internal::ChunkLocation;

namespace compute {
namespace internal {
Expand Down Expand Up @@ -754,10 +753,8 @@ class TableSorter {
std::merge(nulls_begin, nulls_middle, nulls_middle, nulls_end, temp_indices,
[&](uint64_t left, uint64_t right) {
// First column is either null or nan
left_loc =
left_resolver_.ResolveWithChunkIndexHint(left, left_loc.chunk_index);
right_loc = right_resolver_.ResolveWithChunkIndexHint(
right, right_loc.chunk_index);
left_loc = left_resolver_.ResolveWithHint(left, left_loc);
right_loc = right_resolver_.ResolveWithHint(right, right_loc);
auto chunk_left = first_sort_key.GetChunk<ArrayType>(left_loc);
auto chunk_right = first_sort_key.GetChunk<ArrayType>(right_loc);
const auto left_is_null = chunk_left.IsNull();
Expand Down Expand Up @@ -793,10 +790,8 @@ class TableSorter {
std::merge(nulls_begin, nulls_middle, nulls_middle, nulls_end, temp_indices,
[&](uint64_t left, uint64_t right) {
// First column is always null
left_loc =
left_resolver_.ResolveWithChunkIndexHint(left, left_loc.chunk_index);
right_loc = right_resolver_.ResolveWithChunkIndexHint(
right, right_loc.chunk_index);
left_loc = left_resolver_.ResolveWithHint(left, left_loc);
right_loc = right_resolver_.ResolveWithHint(right, right_loc);
return comparator.Compare(left_loc, right_loc, 1);
});
// Copy back temp area into main buffer
Expand All @@ -821,10 +816,8 @@ class TableSorter {
std::merge(range_begin, range_middle, range_middle, range_end, temp_indices,
[&](uint64_t left, uint64_t right) {
// Both values are never null nor NaN.
left_loc =
left_resolver_.ResolveWithChunkIndexHint(left, left_loc.chunk_index);
right_loc = right_resolver_.ResolveWithChunkIndexHint(
right, right_loc.chunk_index);
left_loc = left_resolver_.ResolveWithHint(left, left_loc);
right_loc = right_resolver_.ResolveWithHint(right, right_loc);
auto chunk_left = first_sort_key.GetChunk<ArrayType>(left_loc);
auto chunk_right = first_sort_key.GetChunk<ArrayType>(right_loc);
DCHECK(!chunk_left.IsNull());
Expand Down Expand Up @@ -862,7 +855,7 @@ class TableSorter {
const RecordBatchVector batches_;
const SortOptions& options_;
const NullPlacement null_placement_;
const ::arrow::internal::ChunkResolver left_resolver_, right_resolver_;
const ::arrow::ChunkResolver left_resolver_, right_resolver_;
const std::vector<ResolvedSortKey> sort_keys_;
uint64_t* indices_begin_;
uint64_t* indices_end_;
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/kernels/vector_sort_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -752,10 +752,10 @@ struct ResolvedTableSortKey {
order(order),
null_count(null_count) {}

using LocationType = ::arrow::internal::ChunkLocation;
using LocationType = ::arrow::ChunkLocation;

template <typename ArrayType>
ResolvedChunk<ArrayType> GetChunk(::arrow::internal::ChunkLocation loc) const {
ResolvedChunk<ArrayType> GetChunk(::arrow::ChunkLocation loc) const {
return {checked_cast<const ArrayType*>(chunks[loc.chunk_index]), loc.index_in_chunk};
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/stl_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ class ChunkedArrayIterator {
}

private:
arrow::internal::ChunkLocation GetChunkLocation(int64_t index) const {
arrow::ChunkLocation GetChunkLocation(int64_t index) const {
assert(chunked_array_);
return chunked_array_->chunk_resolver_.Resolve(index);
}
Expand Down
4 changes: 2 additions & 2 deletions r/src/altrep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,13 @@ class ArrowAltrepData {

const std::shared_ptr<ChunkedArray>& chunked_array() { return chunked_array_; }

arrow::internal::ChunkLocation locate(int64_t index) {
arrow::ChunkLocation locate(int64_t index) {
return resolver_.Resolve(index);
}
Comment on lines +90 to 92
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The linter wants this to be in a single line.


private:
std::shared_ptr<ChunkedArray> chunked_array_;
arrow::internal::ChunkResolver resolver_;
arrow::ChunkResolver resolver_;
};

// the ChunkedArray that is being wrapped by the altrep object
Expand Down