Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
76 commits
Select commit Hold shift + click to select a range
659e332
Add large bit_width unpack64 tests
AntoinePrv Sep 16, 2025
45767c9
Handle 16bit unpacking generation
AntoinePrv Sep 3, 2025
a62ba1c
Use uint8_t* input for simd unpack
AntoinePrv Sep 16, 2025
b9d3a20
Gen: regenerate bpacking_simd
AntoinePrv Sep 16, 2025
c44ad17
Exclude Python codegen from doxygen
AntoinePrv Sep 16, 2025
eccd09a
Make generic scalar unpacking codegen
AntoinePrv Sep 16, 2025
9a5c254
Gen: regenerate scalar unpack function in single file
AntoinePrv Sep 16, 2025
23e7268
Simplify scalar_unpack increments
AntoinePrv Sep 16, 2025
cffc9d4
Gen: regenerate bpacking_scalar
AntoinePrv Sep 16, 2025
3abbc15
Try: reinterpret cast
AntoinePrv Sep 18, 2025
b9afb28
Revert: reinterpret cast
AntoinePrv Sep 18, 2025
7f96639
Simplify simd generator code
AntoinePrv Sep 18, 2025
ded98a4
Gen: regenerate simd files
AntoinePrv Sep 18, 2025
cbb57ad
Use templated method in SimdUnpacker
AntoinePrv Sep 18, 2025
23ed46f
Gen: regenerate simd files
AntoinePrv Sep 18, 2025
325cd9f
Slight improvement to SIMD codegen
AntoinePrv Sep 18, 2025
619d2a0
Use template functions in scalar codegen and factor dispatch
AntoinePrv Sep 18, 2025
1b48392
Gen: regenerate unpack files
AntoinePrv Sep 18, 2025
baf4a5a
Try new simd scheme
AntoinePrv Sep 19, 2025
faf8fac
Fix template specialization
AntoinePrv Sep 19, 2025
1e4b851
Gen: regenerate unpack files
AntoinePrv Sep 19, 2025
2913f5e
Only generate simd 32
AntoinePrv Sep 19, 2025
44dead1
Gen: regenerate unpack files
AntoinePrv Sep 19, 2025
642d252
Add SSE4.2 instantiation to bpacking
AntoinePrv Sep 19, 2025
ca9ce76
Try: new simd scheme
AntoinePrv Sep 19, 2025
122ae9c
WIP: new simd algo
AntoinePrv Sep 19, 2025
8bc3da9
WIP
AntoinePrv Sep 22, 2025
cf20fd3
Struct unpacker for width and no generate null/full
AntoinePrv Sep 23, 2025
8698c6e
Gen: regenerate unpack files
AntoinePrv Sep 23, 2025
c4c3fb6
WIP simd unpack
AntoinePrv Sep 24, 2025
899c630
Generate scalar 16 bit unpacking
AntoinePrv Sep 24, 2025
c9a73fd
Move scalar unpack functions to their own file
AntoinePrv Sep 24, 2025
5a84651
Gen: regenerate unpack files
AntoinePrv Sep 24, 2025
28fc099
Test all width
AntoinePrv Sep 24, 2025
52966d4
Add sse2 file
AntoinePrv Sep 24, 2025
4ac79f9
Simplify bpacking files
AntoinePrv Sep 24, 2025
691cac2
Add simd 16 unpack functions
AntoinePrv Sep 24, 2025
96f3e44
Gen: regenerate unpack files
AntoinePrv Sep 24, 2025
dd6fc09
Remove unpack16_avx512
AntoinePrv Sep 25, 2025
ae578d4
Gen: regenerate avx512 file
AntoinePrv Sep 25, 2025
90d81d3
Add missing header
AntoinePrv Sep 25, 2025
95b7acc
Add simd 64
AntoinePrv Sep 25, 2025
a7d8d33
Gen: regenerate simd files
AntoinePrv Sep 25, 2025
391cef5
Missing 64 impl
AntoinePrv Sep 25, 2025
205e274
BENCHMARK ALL
AntoinePrv Sep 25, 2025
8ffa9c7
Adjust used unpack functions
AntoinePrv Sep 25, 2025
4a85358
use constexpr jump table
AntoinePrv Sep 25, 2025
e8a7afd
Reduce number of displayed tests
AntoinePrv Sep 25, 2025
71cc469
Fix array CTAD
AntoinePrv Sep 25, 2025
95ab887
Revert "BENCHMARK ALL"
AntoinePrv Sep 25, 2025
a081014
Fix missing header guard
AntoinePrv Sep 25, 2025
494cd4e
Use template for public unpack functions
AntoinePrv Sep 25, 2025
78b730a
Fix template instanciation declaration
AntoinePrv Sep 25, 2025
3e748da
Use template for internal unpack functions
AntoinePrv Sep 26, 2025
b950b07
Fix UB
AntoinePrv Sep 26, 2025
d4b4d71
Use unpack<uint16_t> in BitReader
AntoinePrv Sep 26, 2025
06e28f2
Gen: regenerate files
AntoinePrv Oct 13, 2025
756b95a
Generate unpack16 function calling unpack32
AntoinePrv Oct 13, 2025
503e646
Use unpack16 simd functions
AntoinePrv Oct 13, 2025
3dd8f99
Add unpack_avx512<uint16_t>
AntoinePrv Oct 13, 2025
831e5d0
Add unpack8
AntoinePrv Oct 13, 2025
4c9a2e9
Gen: regenerate files
AntoinePrv Oct 13, 2025
10263d7
Use unpack functions universally
AntoinePrv Oct 13, 2025
7e5fffb
Remove unpack on SSE4.2
AntoinePrv Oct 13, 2025
1f963dc
Avoid namespace conflict
AntoinePrv Oct 13, 2025
1cbe7cb
Try manual jump table
AntoinePrv Oct 14, 2025
d9e6f55
Update gitattribute
AntoinePrv Oct 14, 2025
737ef05
Address review comments
AntoinePrv Oct 14, 2025
5f5ec52
Add anonymous namespace
AntoinePrv Oct 14, 2025
a40b87c
Add unpack<bool>
AntoinePrv Oct 14, 2025
4b7a83b
Rename test
AntoinePrv Oct 15, 2025
3821a13
Rename simd_min > simd_default
AntoinePrv Oct 15, 2025
28e5abe
Address reviewer comments
AntoinePrv Oct 15, 2025
298a1ea
Size of bool may not be one
AntoinePrv Oct 16, 2025
5926c53
Add half width comment
AntoinePrv Oct 16, 2025
38aa052
Refine comment
pitrou Oct 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cpp/src/arrow/util/bpacking_*_generated.h linguist-generated=true
cpp/src/arrow/util/bpacking_*_generated_internal.h linguist-generated=true
cpp/src/parquet/chunker_*_generated.h linguist-generated=true
cpp/src/generated/*.cpp linguist-generated=true
cpp/src/generated/*.h linguist-generated=true
Expand Down
1 change: 1 addition & 0 deletions cpp/apidoc/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -1095,6 +1095,7 @@ EXCLUDE_PATTERNS = *-test.cc \
*test* \
*_generated.h \
*-benchmark.cc \
*_codegen.py \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm surprised, does Doxygen actually look at *.py files?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was too but it was giving me trouble with it

*internal*

# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
Expand Down
9 changes: 4 additions & 5 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,8 @@ set(ARROW_UTIL_SRCS
util/bitmap_builders.cc
util/bitmap_ops.cc
util/bpacking.cc
util/bpacking_scalar.cc
util/bpacking_simd_default.cc
util/byte_size.cc
util/byte_stream_split_internal.cc
util/cancel.cc
Expand Down Expand Up @@ -533,11 +535,8 @@ set(ARROW_UTIL_SRCS

append_runtime_avx2_src(ARROW_UTIL_SRCS util/byte_stream_split_internal_avx2.cc)

append_runtime_avx2_src(ARROW_UTIL_SRCS util/bpacking_avx2.cc)
append_runtime_avx512_src(ARROW_UTIL_SRCS util/bpacking_avx512.cc)
if(ARROW_HAVE_NEON)
list(APPEND ARROW_UTIL_SRCS util/bpacking_neon.cc)
endif()
append_runtime_avx2_src(ARROW_UTIL_SRCS util/bpacking_simd_avx2.cc)
append_runtime_avx512_src(ARROW_UTIL_SRCS util/bpacking_simd_avx512.cc)

if(ARROW_WITH_BROTLI)
list(APPEND ARROW_UTIL_SRCS util/compression_brotli.cc)
Expand Down
59 changes: 18 additions & 41 deletions cpp/src/arrow/util/bit_stream_utils_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,18 @@ inline bool BitReader::GetValue(int num_bits, T* v) {
return GetBatch(num_bits, v, 1) == 1;
}

namespace internal_bit_reader {
template <typename T>
struct unpack_detect {
using type = std::make_unsigned_t<T>;
};

template <>
struct unpack_detect<bool> {
using type = bool;
};
} // namespace internal_bit_reader

template <typename T>
inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) {
ARROW_DCHECK(buffer_ != NULL);
Expand Down Expand Up @@ -323,47 +335,12 @@ inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) {
}
}

if (sizeof(T) == 4) {
int num_unpacked =
internal::unpack32(buffer + byte_offset, reinterpret_cast<uint32_t*>(v + i),
batch_size - i, num_bits);
i += num_unpacked;
byte_offset += num_unpacked * num_bits / 8;
} else if (sizeof(T) == 8 && num_bits > 32) {
// Use unpack64 only if num_bits is larger than 32
// TODO (ARROW-13677): improve the performance of internal::unpack64
// and remove the restriction of num_bits
int num_unpacked =
internal::unpack64(buffer + byte_offset, reinterpret_cast<uint64_t*>(v + i),
batch_size - i, num_bits);
i += num_unpacked;
byte_offset += num_unpacked * num_bits / 8;
} else {
// TODO: revisit this limit if necessary
ARROW_DCHECK_LE(num_bits, 32);
const int buffer_size = 1024;
uint32_t unpack_buffer[buffer_size];
while (i < batch_size) {
int unpack_size = std::min(buffer_size, batch_size - i);
int num_unpacked =
internal::unpack32(buffer + byte_offset, unpack_buffer, unpack_size, num_bits);
if (num_unpacked == 0) {
break;
}
for (int k = 0; k < num_unpacked; ++k) {
#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable : 4800)
#endif
v[i + k] = static_cast<T>(unpack_buffer[k]);
#ifdef _MSC_VER
# pragma warning(pop)
#endif
}
i += num_unpacked;
byte_offset += num_unpacked * num_bits / 8;
}
}
using unpack_t = typename internal_bit_reader::unpack_detect<T>::type;

int num_unpacked = ::arrow::internal::unpack(
buffer + byte_offset, reinterpret_cast<unpack_t*>(v + i), batch_size - i, num_bits);
i += num_unpacked;
byte_offset += num_unpacked * num_bits / 8;

buffered_values =
detail::ReadLittleEndianWord(buffer + byte_offset, max_bytes - byte_offset);
Expand Down
Loading
Loading