Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 0 additions & 34 deletions src/avx2-32bit-qsort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,6 @@ struct avx2_vector<int32_t> {
{
return _mm256_xor_si256(x, y);
}
static opmask_t knot_opmask(opmask_t x)
{
return ~x;
}
static opmask_t le(reg_t x, reg_t y)
{
return ~_mm256_cmpgt_epi32(x, y);
}
static opmask_t ge(reg_t x, reg_t y)
{
opmask_t equal = eq(x, y);
Expand Down Expand Up @@ -178,11 +170,6 @@ struct avx2_vector<int32_t> {
const __m256i rev_index = _mm256_set_epi32(NETWORK_32BIT_AVX2_2);
return permutexvar(rev_index, ymm);
}
template <int index>
static type_t extract(reg_t v)
{
return _mm256_extract_epi32(v, index);
}
static type_t reducemax(reg_t v)
{
return avx2_emu_reduce_max32<type_t>(v);
Expand Down Expand Up @@ -274,10 +261,6 @@ struct avx2_vector<uint32_t> {
{
return _mm256_i32gather_epi32((int const *)base, index, scale);
}
static opmask_t knot_opmask(opmask_t x)
{
return ~x;
}
static opmask_t ge(reg_t x, reg_t y)
{
reg_t maxi = max(x, y);
Expand Down Expand Up @@ -331,11 +314,6 @@ struct avx2_vector<uint32_t> {
const __m256i rev_index = _mm256_set_epi32(NETWORK_32BIT_AVX2_2);
return permutexvar(rev_index, ymm);
}
template <int index>
static type_t extract(reg_t v)
{
return _mm256_extract_epi32(v, index);
}
static type_t reducemax(reg_t v)
{
return avx2_emu_reduce_max32<type_t>(v);
Expand Down Expand Up @@ -417,10 +395,6 @@ struct avx2_vector<float> {
{
return _mm256_maskload_ps((const float *)mem, mask);
}
static opmask_t knot_opmask(opmask_t x)
{
return ~x;
}
static opmask_t ge(reg_t x, reg_t y)
{
return _mm256_castps_si256(_mm256_cmp_ps(x, y, _CMP_GE_OQ));
Expand Down Expand Up @@ -503,14 +477,6 @@ struct avx2_vector<float> {
const __m256i rev_index = _mm256_set_epi32(NETWORK_32BIT_AVX2_2);
return permutexvar(rev_index, ymm);
}
template <int index>
static type_t extract(reg_t v)
{
int32_t x = _mm256_extract_epi32(_mm256_castps_si256(v), index);
float y;
std::memcpy(&y, &x, sizeof(y));
return y;
}
static type_t reducemax(reg_t v)
{
return avx2_emu_reduce_max32<type_t>(v);
Expand Down
18 changes: 0 additions & 18 deletions src/avx2-64bit-qsort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,6 @@ struct avx2_vector<int64_t> {
const int32_t rev_index = SHUFFLE_MASK(0, 1, 2, 3);
return permutexvar<rev_index>(ymm);
}
template <int index>
static type_t extract(reg_t v)
{
return _mm256_extract_epi64(v, index);
}
static type_t reducemax(reg_t v)
{
return avx2_emu_reduce_max64<type_t>(v);
Expand Down Expand Up @@ -335,11 +330,6 @@ struct avx2_vector<uint64_t> {
const int32_t rev_index = SHUFFLE_MASK(0, 1, 2, 3);
return permutexvar<rev_index>(ymm);
}
template <int index>
static type_t extract(reg_t v)
{
return _mm256_extract_epi64(v, index);
}
static type_t reducemax(reg_t v)
{
return avx2_emu_reduce_max64<type_t>(v);
Expand Down Expand Up @@ -504,14 +494,6 @@ struct avx2_vector<double> {
const int32_t rev_index = SHUFFLE_MASK(0, 1, 2, 3);
return permutexvar<rev_index>(ymm);
}
template <int index>
static type_t extract(reg_t v)
{
int64_t x = _mm256_extract_epi64(_mm256_castpd_si256(v), index);
double y;
std::memcpy(&y, &x, sizeof(y));
return y;
}
static type_t reducemax(reg_t v)
{
return avx2_emu_reduce_max64<type_t>(v);
Expand Down
30 changes: 16 additions & 14 deletions src/avx2-emu-funcs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ T avx2_emu_reduce_max32(typename avx2_vector<T>::reg_t x)
x, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(x));
reg_t inter2 = vtype::max(
inter1, vtype::template shuffle<SHUFFLE_MASK(1, 0, 3, 2)>(inter1));
T can1 = vtype::template extract<0>(inter2);
T can2 = vtype::template extract<4>(inter2);
return std::max(can1, can2);
T arr[vtype::numlanes];
vtype::storeu(arr, inter2);
return std::max(arr[0], arr[7]);
}

template <typename T>
Expand All @@ -149,9 +149,9 @@ T avx2_emu_reduce_min32(typename avx2_vector<T>::reg_t x)
x, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(x));
reg_t inter2 = vtype::min(
inter1, vtype::template shuffle<SHUFFLE_MASK(1, 0, 3, 2)>(inter1));
T can1 = vtype::template extract<0>(inter2);
T can2 = vtype::template extract<4>(inter2);
return std::min(can1, can2);
T arr[vtype::numlanes];
vtype::storeu(arr, inter2);
return std::min(arr[0], arr[7]);
}

template <typename T>
Expand All @@ -160,9 +160,9 @@ T avx2_emu_reduce_max64(typename avx2_vector<T>::reg_t x)
using vtype = avx2_vector<T>;
typename vtype::reg_t inter1 = vtype::max(
x, vtype::template permutexvar<SHUFFLE_MASK(2, 3, 0, 1)>(x));
T can1 = vtype::template extract<0>(inter1);
T can2 = vtype::template extract<2>(inter1);
return std::max<T>(can1, can2);
T arr[vtype::numlanes];
vtype::storeu(arr, inter1);
return std::max(arr[0], arr[3]);
}

template <typename T>
Expand All @@ -171,9 +171,9 @@ T avx2_emu_reduce_min64(typename avx2_vector<T>::reg_t x)
using vtype = avx2_vector<T>;
typename vtype::reg_t inter1 = vtype::min(
x, vtype::template permutexvar<SHUFFLE_MASK(2, 3, 0, 1)>(x));
T can1 = vtype::template extract<0>(inter1);
T can2 = vtype::template extract<2>(inter1);
return std::min<T>(can1, can2);
T arr[vtype::numlanes];
vtype::storeu(arr, inter1);
return std::min(arr[0], arr[3]);
}

template <typename T>
Expand Down Expand Up @@ -224,6 +224,7 @@ int avx2_double_compressstore32(void *left_addr,
typename avx2_vector<T>::reg_t reg)
{
using vtype = avx2_vector<T>;
const __m256i oxff = _mm256_set1_epi32(0xFFFFFFFF);

T *leftStore = (T *)left_addr;
T *rightStore = (T *)right_addr;
Expand All @@ -237,7 +238,7 @@ int avx2_double_compressstore32(void *left_addr,
typename vtype::reg_t temp = vtype::permutevar(reg, perm);

vtype::mask_storeu(leftStore, left, temp);
vtype::mask_storeu(rightStore, ~left, temp);
vtype::mask_storeu(rightStore, _mm256_xor_si256(oxff, left), temp);

return _mm_popcnt_u32(shortMask);
}
Expand All @@ -249,6 +250,7 @@ int32_t avx2_double_compressstore64(void *left_addr,
typename avx2_vector<T>::reg_t reg)
{
using vtype = avx2_vector<T>;
const __m256i oxff = _mm256_set1_epi32(0xFFFFFFFF);

T *leftStore = (T *)left_addr;
T *rightStore = (T *)right_addr;
Expand All @@ -263,7 +265,7 @@ int32_t avx2_double_compressstore64(void *left_addr,
_mm256_permutevar8x32_epi32(vtype::cast_to(reg), perm));

vtype::mask_storeu(leftStore, left, temp);
vtype::mask_storeu(rightStore, ~left, temp);
vtype::mask_storeu(rightStore, _mm256_xor_si256(oxff, left), temp);

return _mm_popcnt_u32(shortMask);
}
Expand Down