Skip to content

Commit

Permalink
[runtime] Fixed bug in x86 polynomial hash
Browse files Browse the repository at this point in the history
Forced compiler to use legacy SSE instructions form instead of modern AVX form
  • Loading branch information
homuroll committed Feb 9, 2021
1 parent 2064c6c commit e734b52
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 39 deletions.
8 changes: 0 additions & 8 deletions runtime/src/main/cpp/polyhash/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,6 @@ constexpr std::array<uint32_t, Count> RepeatingPowers(uint32_t base, uint8_t exp
return result;
}

#if defined(__x86_64__) or defined(__i386__)
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#endif

template<typename Traits>
ALWAYS_INLINE void polyHashTail(int& n, uint16_t const*& str, typename Traits::Vec128Type& res, uint32_t const* b, uint32_t const* p) {
using VecType = typename Traits::VecType;
Expand Down Expand Up @@ -194,8 +190,4 @@ ALWAYS_INLINE void polyHashUnroll8(int& n, uint16_t const*& str, typename Traits
res = Traits::vec128Add(res, Traits::vec128Add(sum1, sum2));
}

#if defined(__x86_64__) or defined(__i386__)
#pragma clang attribute pop
#endif

#endif // RUNTIME_POLYHASH_COMMON_H
61 changes: 30 additions & 31 deletions runtime/src/main/cpp/polyhash/x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@

#if defined(__x86_64__) or defined(__i386__)

#include <immintrin.h>
#define __SSE41__ __attribute__((target("sse4.1")))
#define __AVX2__ __attribute__((target("avx2")))

#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#include <immintrin.h>

namespace {

Expand All @@ -26,24 +27,24 @@ struct SSETraits {
using Vec128Type = __m128i;
using U16VecType = __m128i;

ALWAYS_INLINE static VecType initVec() { return _mm_setzero_si128(); }
ALWAYS_INLINE static Vec128Type initVec128() { return _mm_setzero_si128(); }
ALWAYS_INLINE static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); }
ALWAYS_INLINE static VecType u16Load(U16VecType x) { return _mm_cvtepu16_epi32(x); }
ALWAYS_INLINE static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); }
ALWAYS_INLINE static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); }
ALWAYS_INLINE static VecType vecMul(VecType x, VecType y) { return _mm_mullo_epi32(x, y); }
ALWAYS_INLINE static VecType vecAdd(VecType x, VecType y) { return _mm_add_epi32(x, y); }
ALWAYS_INLINE static Vec128Type squash2(VecType x, VecType y) {
__SSE41__ static VecType initVec() { return _mm_setzero_si128(); }
__SSE41__ static Vec128Type initVec128() { return _mm_setzero_si128(); }
__SSE41__ static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); }
__SSE41__ static VecType u16Load(U16VecType x) { return _mm_cvtepu16_epi32(x); }
__SSE41__ static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); }
__SSE41__ static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); }
__SSE41__ static VecType vecMul(VecType x, VecType y) { return _mm_mullo_epi32(x, y); }
__SSE41__ static VecType vecAdd(VecType x, VecType y) { return _mm_add_epi32(x, y); }
__SSE41__ static Vec128Type squash2(VecType x, VecType y) {
return squash1(_mm_hadd_epi32(x, y)); // [x0 + x1, x2 + x3, y0 + y1, y2 + y3]
}

ALWAYS_INLINE static Vec128Type squash1(VecType z) {
__SSE41__ static Vec128Type squash1(VecType z) {
VecType sum = _mm_hadd_epi32(z, z); // [z0 + z1, z2 + z3, z0 + z1, z2 + z3]
return _mm_hadd_epi32(sum, sum); // [z0..3, same, same, same]
}

static int polyHashUnalignedUnrollUpTo8(int n, uint16_t const* str) {
__SSE41__ static int polyHashUnalignedUnrollUpTo8(int n, uint16_t const* str) {
Vec128Type res = initVec128();

polyHashUnroll2<SSETraits>(n, str, res, &b8[0], &p64[56]);
Expand All @@ -52,7 +53,7 @@ struct SSETraits {
return vec128toInt(res);
}

static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) {
__SSE41__ static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) {
Vec128Type res = initVec128();

polyHashUnroll4<SSETraits>(n, str, res, &b16[0], &p64[48]);
Expand All @@ -68,27 +69,27 @@ struct AVX2Traits {
using Vec128Type = __m128i;
using U16VecType = __m128i;

ALWAYS_INLINE static VecType initVec() { return _mm256_setzero_si256(); }
ALWAYS_INLINE static Vec128Type initVec128() { return _mm_setzero_si128(); }
ALWAYS_INLINE static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); }
ALWAYS_INLINE static VecType u16Load(U16VecType x) { return _mm256_cvtepu16_epi32(x); }
ALWAYS_INLINE static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); }
ALWAYS_INLINE static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); }
ALWAYS_INLINE static VecType vecMul(VecType x, VecType y) { return _mm256_mullo_epi32(x, y); }
ALWAYS_INLINE static VecType vecAdd(VecType x, VecType y) { return _mm256_add_epi32(x, y); }
ALWAYS_INLINE static Vec128Type squash2(VecType x, VecType y) {
__AVX2__ static VecType initVec() { return _mm256_setzero_si256(); }
__AVX2__ static Vec128Type initVec128() { return _mm_setzero_si128(); }
__AVX2__ static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); }
__AVX2__ static VecType u16Load(U16VecType x) { return _mm256_cvtepu16_epi32(x); }
__AVX2__ static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); }
__AVX2__ static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); }
__AVX2__ static VecType vecMul(VecType x, VecType y) { return _mm256_mullo_epi32(x, y); }
__AVX2__ static VecType vecAdd(VecType x, VecType y) { return _mm256_add_epi32(x, y); }
__AVX2__ static Vec128Type squash2(VecType x, VecType y) {
return squash1(_mm256_hadd_epi32(x, y)); // [x0 + x1, x2 + x3, y0 + y1, y2 + y3, x4 + x5, x6 + x7, y4 + y5, y6 + y7]
}

ALWAYS_INLINE static Vec128Type squash1(VecType z) {
__AVX2__ static Vec128Type squash1(VecType z) {
VecType sum = _mm256_hadd_epi32(z, z); // [z0 + z1, z2 + z3, z0 + z1, z2 + z3, z4 + z5, z6 + z7, z4 + z5, z6 + z7]
sum = _mm256_hadd_epi32(sum, sum); // [z0..3, z0..3, z0..3, z0..3, z4..7, z4..7, z4..7, z4..7]
Vec128Type lo = _mm256_extracti128_si256(sum, 0); // [z0..3, same, same, same]
Vec128Type hi = _mm256_extracti128_si256(sum, 1); // [z4..7, same, same, same]
return _mm_add_epi32(lo, hi); // [z0..7, same, same, same]
}

static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) {
__AVX2__ static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) {
Vec128Type res = initVec128();

polyHashUnroll2<AVX2Traits>(n, str, res, &b16[0], &p64[48]);
Expand All @@ -98,7 +99,7 @@ struct AVX2Traits {
return vec128toInt(res);
}

static int polyHashUnalignedUnrollUpTo32(int n, uint16_t const* str) {
__AVX2__ static int polyHashUnalignedUnrollUpTo32(int n, uint16_t const* str) {
Vec128Type res = initVec128();

polyHashUnroll4<AVX2Traits>(n, str, res, &b32[0], &p64[32]);
Expand All @@ -109,7 +110,7 @@ struct AVX2Traits {
return vec128toInt(res);
}

static int polyHashUnalignedUnrollUpTo64(int n, uint16_t const* str) {
__AVX2__ static int polyHashUnalignedUnrollUpTo64(int n, uint16_t const* str) {
Vec128Type res = initVec128();

polyHashUnroll8<AVX2Traits>(n, str, res, &b64[0], &p64[0]);
Expand All @@ -128,8 +129,8 @@ struct AVX2Traits {
const bool x64 = false;
#endif
bool initialized = false;
bool sseSupported;
bool avx2Supported;
bool sseSupported = false;
bool avx2Supported = false;

}

Expand Down Expand Up @@ -161,6 +162,4 @@ int polyHash_x86(int length, uint16_t const* str) {
return res;
}

#pragma clang attribute pop

#endif

0 comments on commit e734b52

Please sign in to comment.