|
5 | 5 | #if defined(ENABLE_SSE41) && defined(ENABLE_X86_AESNI) |
6 | 6 | #include <crypto/x11/util/util.hpp> |
7 | 7 |
|
8 | | -#include <cstdint> |
| 8 | +#include <cstddef> |
9 | 9 |
|
10 | 10 | #include <immintrin.h> |
11 | 11 | #include <wmmintrin.h> |
12 | 12 |
|
13 | 13 | namespace sapphire { |
14 | | -namespace x86_aesni_echo { |
15 | | -void FullStateRound(uint64_t W[16][2], uint32_t& k0, uint32_t& k1, uint32_t& k2, uint32_t& k3) |
| 14 | +namespace { |
| 15 | +void ALWAYS_INLINE StateRound(uint64_t W[16][2], size_t idx, __m128i& key, uint32_t& k0, uint32_t& k1, uint32_t& k2, uint32_t& k3) |
16 | 16 | { |
17 | | - __m128i key = util::pack_le(k0, k1, k2, k3); |
18 | | - for (int n = 0; n < 16; n++) { |
19 | | - __m128i block = _mm_load_si128((const __m128i*)&W[n][0]); |
20 | | - block = util::aes_round(block, key); |
21 | | - block = util::aes_round(block, _mm_setzero_si128()); |
22 | | - _mm_store_si128((__m128i*)&W[n][0], block); |
| 17 | + __m128i block = _mm_load_si128((const __m128i*)&W[idx][0]); |
| 18 | + block = util::aes_round(block, key); |
| 19 | + block = util::aes_round(block, _mm_setzero_si128()); |
| 20 | + _mm_store_si128((__m128i*)&W[idx][0], block); |
23 | 21 |
|
24 | | - util::unpack_le(key, k0, k1, k2, k3); |
25 | | - if ((k0 = (k0 + 1)) == 0) { |
26 | | - if ((k1 = (k1 + 1)) == 0) { |
27 | | - if ((k2 = (k2 + 1)) == 0) { |
28 | | - k3 = (k3 + 1); |
29 | | - } |
| 22 | + util::unpack_le(key, k0, k1, k2, k3); |
| 23 | + if ((k0 = (k0 + 1)) == 0) { |
| 24 | + if ((k1 = (k1 + 1)) == 0) { |
| 25 | + if ((k2 = (k2 + 1)) == 0) { |
| 26 | + k3 = (k3 + 1); |
30 | 27 | } |
31 | 28 | } |
32 | | - key = util::pack_le(k0, k1, k2, k3); |
33 | 29 | } |
34 | | - util::unpack_le(key, k0, k1, k2, k3); |
| 30 | +} |
| 31 | +} // anonymous namespace |
| 32 | + |
| 33 | +namespace x86_aesni_echo { |
| 34 | +void FullStateRound(uint64_t W[16][2], uint32_t& k0, uint32_t& k1, uint32_t& k2, uint32_t& k3) |
| 35 | +{ |
| 36 | + __m128i key = util::pack_le(k0, k1, k2, k3); |
| 37 | + StateRound(W, 0, key, k0, k1, k2, k3); |
| 38 | + key = util::pack_le(k0, k1, k2, k3); |
| 39 | + StateRound(W, 1, key, k0, k1, k2, k3); |
| 40 | + key = util::pack_le(k0, k1, k2, k3); |
| 41 | + StateRound(W, 2, key, k0, k1, k2, k3); |
| 42 | + key = util::pack_le(k0, k1, k2, k3); |
| 43 | + StateRound(W, 3, key, k0, k1, k2, k3); |
| 44 | + key = util::pack_le(k0, k1, k2, k3); |
| 45 | + StateRound(W, 4, key, k0, k1, k2, k3); |
| 46 | + key = util::pack_le(k0, k1, k2, k3); |
| 47 | + StateRound(W, 5, key, k0, k1, k2, k3); |
| 48 | + key = util::pack_le(k0, k1, k2, k3); |
| 49 | + StateRound(W, 6, key, k0, k1, k2, k3); |
| 50 | + key = util::pack_le(k0, k1, k2, k3); |
| 51 | + StateRound(W, 7, key, k0, k1, k2, k3); |
| 52 | + key = util::pack_le(k0, k1, k2, k3); |
| 53 | + StateRound(W, 8, key, k0, k1, k2, k3); |
| 54 | + key = util::pack_le(k0, k1, k2, k3); |
| 55 | + StateRound(W, 9, key, k0, k1, k2, k3); |
| 56 | + key = util::pack_le(k0, k1, k2, k3); |
| 57 | + StateRound(W, 10, key, k0, k1, k2, k3); |
| 58 | + key = util::pack_le(k0, k1, k2, k3); |
| 59 | + StateRound(W, 11, key, k0, k1, k2, k3); |
| 60 | + key = util::pack_le(k0, k1, k2, k3); |
| 61 | + StateRound(W, 12, key, k0, k1, k2, k3); |
| 62 | + key = util::pack_le(k0, k1, k2, k3); |
| 63 | + StateRound(W, 13, key, k0, k1, k2, k3); |
| 64 | + key = util::pack_le(k0, k1, k2, k3); |
| 65 | + StateRound(W, 14, key, k0, k1, k2, k3); |
| 66 | + key = util::pack_le(k0, k1, k2, k3); |
| 67 | + StateRound(W, 15, key, k0, k1, k2, k3); |
35 | 68 | } |
36 | 69 | } // namespace x86_aesni_echo |
37 | 70 | } // namespace sapphire |
|
0 commit comments