Skip to content

Commit

Permalink
[improvement](bitshuffle)Enable avx512 support in bitshuffle for perf…
Browse files Browse the repository at this point in the history
…ormance boost

Signed-off-by: Wu, Kaiqiang <kaiqiang.wu@intel.com>
Co-authored-by: vesslanjin <jun.i.jin@intel.com>
  • Loading branch information
HackToday and vesslanjin committed Jan 16, 2023
1 parent fa03c8a commit 901867e
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 6 deletions.
4 changes: 4 additions & 0 deletions be/src/gutil/cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ CPU::CPU()
has_popcnt_(false),
has_avx_(false),
has_avx2_(false),
has_avx512_(false),
has_aesni_(false),
has_non_stop_time_stamp_counter_(false),
is_running_in_vm_(false),
Expand Down Expand Up @@ -201,6 +202,8 @@ void CPU::Initialize() {
(xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */;
has_aesni_ = (cpu_info[2] & 0x02000000) != 0;
has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0;
has_avx512_ = has_avx2_ && (cpu_info7[1] & 0x00010000) != 0 &&
(cpu_info7[1] & 0x40000000) != 0 && (cpu_info7[1] & 0x80000000) != 0;
}
// Get the brand string of the cpu.
__cpuid(cpu_info, 0x80000000);
Expand Down Expand Up @@ -253,6 +256,7 @@ void CPU::Initialize() {
#endif
}
CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const {
if (has_avx512()) return AVX512;
if (has_avx2()) return AVX2;
if (has_avx()) return AVX;
if (has_sse42()) return SSE42;
Expand Down
3 changes: 3 additions & 0 deletions be/src/gutil/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class CPU final {
SSE42,
AVX,
AVX2,
AVX512,
MAX_INTEL_MICRO_ARCHITECTURE
};
// Accessors for CPU information.
Expand All @@ -81,6 +82,7 @@ class CPU final {
bool has_popcnt() const { return has_popcnt_; }
bool has_avx() const { return has_avx_; }
bool has_avx2() const { return has_avx2_; }
bool has_avx512() const { return has_avx512_; }
bool has_aesni() const { return has_aesni_; }
bool has_non_stop_time_stamp_counter() const { return has_non_stop_time_stamp_counter_; }
bool is_running_in_vm() const { return is_running_in_vm_; }
Expand All @@ -107,6 +109,7 @@ class CPU final {
bool has_popcnt_;
bool has_avx_;
bool has_avx2_;
bool has_avx512_;
bool has_aesni_;
bool has_non_stop_time_stamp_counter_;
bool is_running_in_vm_;
Expand Down
15 changes: 14 additions & 1 deletion be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@
#undef bshuf_compress_lz4
#undef bshuf_decompress_lz4

#undef BITSHUFFLE_H
#define bshuf_compress_lz4_bound bshuf_compress_lz4_bound_avx512
#define bshuf_compress_lz4 bshuf_compress_lz4_avx512
#define bshuf_decompress_lz4 bshuf_decompress_lz4_avx512
#include <bitshuffle/bitshuffle.h> // NOLINT(*)
#undef bshuf_compress_lz4_bound
#undef bshuf_compress_lz4
#undef bshuf_decompress_lz4

using base::CPU;

namespace doris {
Expand All @@ -54,7 +63,11 @@ decltype(&bshuf_decompress_lz4) g_bshuf_decompress_lz4;
// the cost of a 'std::once' call.
__attribute__((constructor)) void SelectBitshuffleFunctions() {
#if (defined(__i386) || defined(__x86_64__))
if (CPU().has_avx2()) {
if (CPU().has_avx512()) {
g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound_avx512;
g_bshuf_compress_lz4 = bshuf_compress_lz4_avx512;
g_bshuf_decompress_lz4 = bshuf_decompress_lz4_avx512;
} else if (CPU().has_avx2()) {
g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound_avx2;
g_bshuf_compress_lz4 = bshuf_compress_lz4_avx2;
g_bshuf_decompress_lz4 = bshuf_decompress_lz4_avx2;
Expand Down
11 changes: 10 additions & 1 deletion thirdparty/build-thirdparty.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1039,7 +1039,7 @@ build_bitshuffle() {
# we still need to support non-AVX2-capable hardware. So, we build it twice,
# once with the flag and once without, and use some linker tricks to
# suffix the AVX2 symbols with '_avx2'.
arches=('default' 'avx2')
arches=('default' 'avx2' 'avx512')
MACHINE_TYPE="$(uname -m)"
# Becuase aarch64 don't support avx2, disable it.
if [[ "${MACHINE_TYPE}" == "aarch64" || "${MACHINE_TYPE}" == 'arm64' ]]; then
Expand All @@ -1052,6 +1052,9 @@ build_bitshuffle() {
if [[ "${arch}" == "avx2" ]]; then
arch_flag="-mavx2"
fi
if [ "$arch" == "avx512" ]; then
arch_flag="-mavx512bw -mavx512f"
fi
tmp_obj="bitshuffle_${arch}_tmp.o"
dst_obj="bitshuffle_${arch}.o"
"${CC}" ${EXTRA_CFLAGS:+${EXTRA_CFLAGS}} ${arch_flag:+${arch_flag}} -std=c99 "-I${PREFIX}/include/lz4" -O3 -DNDEBUG -c \
Expand All @@ -1077,6 +1080,12 @@ build_bitshuffle() {
echo "${sym} ${sym}_${arch}"
done >renames.txt
"${objcopy}" --redefine-syms=renames.txt "${tmp_obj}" "${dst_obj}"
elif [ "$arch" == "avx512" ]; then
# Create a mapping file with '<old_sym> <suffixed_sym>' on each line.
"${DORIS_BIN_UTILS}/nm" --defined-only --extern-only $tmp_obj | while read -r addr type sym ; do
echo ${sym} ${sym}_${arch}
done > renames.txt
"${DORIS_BIN_UTILS}/objcopy" --redefine-syms=renames.txt $tmp_obj $dst_obj
else
mv "${tmp_obj}" "${dst_obj}"
fi
Expand Down
8 changes: 4 additions & 4 deletions thirdparty/vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -264,10 +264,10 @@ S2_SOURCE=s2geometry-0.10.0
S2_MD5SUM="c68f3c5d326dde9255681b9201393a9f"

# bitshuffle
BITSHUFFLE_DOWNLOAD="https://github.com/kiyo-masui/bitshuffle/archive/0.3.5.tar.gz"
BITSHUFFLE_NAME=bitshuffle-0.3.5.tar.gz
BITSHUFFLE_SOURCE=bitshuffle-0.3.5
BITSHUFFLE_MD5SUM="2648ec7ccd0b896595c6636d926fc867"
BITSHUFFLE_DOWNLOAD="https://github.com/kiyo-masui/bitshuffle/archive/0.5.1.tar.gz"
BITSHUFFLE_NAME=bitshuffle-0.5.1.tar.gz
BITSHUFFLE_SOURCE=bitshuffle-0.5.1
BITSHUFFLE_MD5SUM="b3bf6a9838927f7eb62214981c138e2f"

# croaringbitmap
CROARINGBITMAP_DOWNLOAD="https://github.com/RoaringBitmap/CRoaring/archive/refs/tags/v0.4.0.tar.gz"
Expand Down

0 comments on commit 901867e

Please sign in to comment.