1+ #include < util/generic/ptr.h>
2+ #include < util/system/cpu_id.h>
3+ #include < util/system/types.h>
4+ #include < util/stream/output.h>
5+ #include < util/generic/string.h>
6+ #include < vector>
7+ #include < immintrin.h>
8+ #include < avxintrin.h>
9+ #include < chrono>
10+ #include < ydb/library/yql/utils/simd/simd.h>
11+
12+
13+ const size_t size = 64e5 ;
14+
15+ template <typename T>
16+ inline double GetSum (std::vector<std::vector<T>>& columns, std::vector<T>& result) {
17+ const size_t SIZE_OF_TYPE = 256 / (sizeof (T) * 8 );
18+ const size_t align_size = columns[0 ].size ();
19+
20+ std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now ();
21+
22+ for (size_t i = 0 ; i < align_size; i += SIZE_OF_TYPE) {
23+ NSimd::NAVX2::TSimd8<T> final_register (&columns[0 ][i]);
24+
25+ for (size_t j = 1 ; j < columns.size (); ++j) {
26+ final_register.Add64 (&columns[j][i]);
27+ }
28+
29+ final_register.Store (&result[i]);
30+ }
31+
32+ std::chrono::steady_clock::time_point finish = std::chrono::steady_clock::now ();
33+
34+ return std::chrono::duration_cast<std::chrono::microseconds>(finish - start).count ();
35+
36+ }
37+
38+ double StandartAdding (std::vector<std::vector<ui64>>& columns, std::vector<ui64>& result) {
39+ std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now ();
40+
41+ for (size_t j = 0 ; j < columns[0 ].size (); ++j) {
42+
43+ for (size_t i = 0 ; i < columns[i].size (); ++i) {
44+ result[j] += columns[i][j];
45+ }
46+
47+ }
48+ std::chrono::steady_clock::time_point finish = std::chrono::steady_clock::now ();
49+
50+ return std::chrono::duration_cast<std::chrono::microseconds>(finish - start).count ();
51+ }
52+
53+ int main () {
54+ std::vector<std::vector<ui64>> vec1 (10 , std::vector<ui64>(size, 1e12 + 3 ));
55+
56+ std::vector<ui64> result1 (size, 0 );
57+ std::vector<ui64> result2 (size, 0 );
58+
59+ double ans1 = GetSum (vec1, result1);
60+ double ans2 = StandartAdding (vec1, result2);
61+
62+ for (size_t i = 0 ; i < result2.size (); ++i) {
63+ if (result2[i] != result1[i]) {
64+ Cerr << " something went wrong..." ;
65+ return 0 ;
66+ }
67+ }
68+
69+ Cerr << " The results are the same. Let's compare times:\n " ;
70+ Cerr << " Time, using AVX2: " << ans1 << " ms\n " ;
71+ Cerr << " Time, using standart adding: " << ans2 << " ms" ;
72+ }
0 commit comments