Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions ydb/library/yql/utils/simd/exec/add_columns/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#include <util/generic/ptr.h>
#include <util/system/cpu_id.h>
#include <util/system/types.h>
#include <util/stream/output.h>
#include <util/generic/string.h>
#include <vector>
#include <immintrin.h>
#include <avxintrin.h>
#include <chrono>
#include <ydb/library/yql/utils/simd/simd.h>


const size_t size = 64e5;

template <typename T>
inline double GetSum(std::vector<std::vector<T>>& columns, std::vector<T>& result) {
const size_t SIZE_OF_TYPE = 256 / (sizeof(T) * 8);
const size_t align_size = columns[0].size();

std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();

for (size_t i = 0; i < align_size; i += SIZE_OF_TYPE) {
NSimd::NAVX2::TSimd8<T> final_register(&columns[0][i]);

for (size_t j = 1; j < columns.size(); ++j) {
final_register.Add64(&columns[j][i]);
}

final_register.Store(&result[i]);
}

std::chrono::steady_clock::time_point finish = std::chrono::steady_clock::now();

return std::chrono::duration_cast<std::chrono::microseconds>(finish - start).count();

}

double StandartAdding(std::vector<std::vector<ui64>>& columns, std::vector<ui64>& result) {
std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();

for (size_t j = 0; j < columns[0].size(); ++j) {

for (size_t i = 0; i < columns[i].size(); ++i) {
result[j] += columns[i][j];
}

}
std::chrono::steady_clock::time_point finish = std::chrono::steady_clock::now();

return std::chrono::duration_cast<std::chrono::microseconds>(finish - start).count();
}

int main() {
std::vector<std::vector<ui64>> vec1(10, std::vector<ui64>(size, 1e12 + 3));

std::vector<ui64> result1(size, 0);
std::vector<ui64> result2(size, 0);

double ans1 = GetSum(vec1, result1);
double ans2 = StandartAdding(vec1, result2);

for (size_t i = 0; i < result2.size(); ++i) {
if (result2[i] != result1[i]) {
Cerr << "something went wrong...";
return 0;
}
}

Cerr << "The results are the same. Let's compare times:\n";
Cerr << "Time, using AVX2: " << ans1 << " ms\n";
Cerr << "Time, using standart adding: " << ans2 << "ms";
}
13 changes: 13 additions & 0 deletions ydb/library/yql/utils/simd/exec/add_columns/ya.make
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
OWNER(g:yql)

PROGRAM(add_columns)

SRCS(main.cpp)

SIZE(MEDIUM)

CFLAGS(-mavx2)

PEERDIR(ydb/library/yql/utils/simd)

END()
1 change: 1 addition & 0 deletions ydb/library/yql/utils/simd/exec/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,5 @@ RECURSE(
pack_tuple
tuples_to_bucket
stream_store
add_columns
)
4 changes: 4 additions & 0 deletions ydb/library/yql/utils/simd/simd_avx2.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ struct TSimd8 {
crc = _mm_crc32_u64(crc, *((ui64*) &this->Value + 3));
return crc;
}

inline void Add64(const TSimd8<T>& another) {
Value = _mm256_add_epi64(Value, another.Value);
}

inline int ToBitMask() const {
return _mm256_movemask_epi8(this->Value);
Expand Down