Skip to content

Commit 71d6ef9

Browse files
committed
crypto: implement SSSE3 backend for Echo512's ShiftRows()
1 parent da38871 commit 71d6ef9

File tree

4 files changed

+106
-24
lines changed

4 files changed

+106
-24
lines changed

src/crypto/x11/dispatch.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ namespace sapphire {
1919
#if defined(ENABLE_SSSE3)
2020
namespace ssse3_echo {
2121
void MixColumns(uint64_t W[16][2]);
22+
void ShiftRows(uint64_t W[16][2]);
2223
} // namespace ssse3_echo
2324
#endif // ENABLE_SSSE3
2425

@@ -46,20 +47,23 @@ void RoundKeyless(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
4647
namespace soft_echo {
4748
void FullStateRound(uint64_t W[16][2], uint32_t& k0, uint32_t& k1, uint32_t& k2, uint32_t& k3);
4849
void MixColumns(uint64_t W[16][2]);
50+
void ShiftRows(uint64_t W[16][2]);
4951
} // namespace soft_echo
5052
} // namespace sapphire
5153

5254
extern sapphire::dispatch::AESRoundFn aes_round;
5355
extern sapphire::dispatch::AESRoundFnNk aes_round_nk;
5456
extern sapphire::dispatch::EchoMixCols echo_mix_columns;
5557
extern sapphire::dispatch::EchoRoundFn echo_round;
58+
extern sapphire::dispatch::EchoShiftRows echo_shift_rows;
5659

5760
void SapphireAutoDetect()
5861
{
5962
aes_round = sapphire::soft_aes::Round;
6063
aes_round_nk = sapphire::soft_aes::RoundKeyless;
6164
echo_round = sapphire::soft_echo::FullStateRound;
6265
echo_mix_columns = sapphire::soft_echo::MixColumns;
66+
echo_shift_rows = sapphire::soft_echo::ShiftRows;
6367

6468
#if !defined(DISABLE_OPTIMIZED_SHA256)
6569
#if defined(HAVE_GETCPUID)
@@ -78,6 +82,7 @@ void SapphireAutoDetect()
7882
const bool use_ssse3 = ((ecx >> 9) & 1);
7983
if (use_ssse3) {
8084
echo_mix_columns = sapphire::ssse3_echo::MixColumns;
85+
echo_shift_rows = sapphire::ssse3_echo::ShiftRows;
8186
}
8287
#endif // ENABLE_SSSE3
8388
#endif // HAVE_GETCPUID

src/crypto/x11/dispatch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ typedef void (*AESRoundFnNk)(uint32_t, uint32_t, uint32_t, uint32_t,
1717

1818
typedef void (*EchoMixCols)(uint64_t[16][2]);
1919
typedef void (*EchoRoundFn)(uint64_t[16][2], uint32_t&, uint32_t&, uint32_t&, uint32_t&);
20+
typedef void (*EchoShiftRows)(uint64_t[16][2]);
2021
} // namespace dispatch
2122
} // namespace sapphire
2223

src/crypto/x11/echo.cpp

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -108,26 +108,9 @@ void MixColumns(uint64_t W[16][2])
108108
MixColumn(W, 8, 9, 10, 11);
109109
MixColumn(W, 12, 13, 14, 15);
110110
}
111-
} // namespace soft_echo
112-
} // namespace sapphire
113-
114-
sapphire::dispatch::EchoMixCols echo_mix_columns = sapphire::soft_echo::MixColumns;
115-
sapphire::dispatch::EchoRoundFn echo_round = sapphire::soft_echo::FullStateRound;
116-
117-
#define DECL_STATE_BIG \
118-
alignas(16) sph_u64 W[16][2];
119-
120-
#define INPUT_BLOCK_BIG(sc) do { \
121-
unsigned u; \
122-
memcpy(W, sc->u.Vb, 16 * sizeof(sph_u64)); \
123-
for (u = 0; u < 8; u ++) { \
124-
W[u + 8][0] = sph_dec64le_aligned( \
125-
sc->buf + 16 * u); \
126-
W[u + 8][1] = sph_dec64le_aligned( \
127-
sc->buf + 16 * u + 8); \
128-
} \
129-
} while (0)
130111

112+
void ShiftRows(uint64_t W[16][2])
113+
{
131114
#define SHIFT_ROW1(a, b, c, d) do { \
132115
sph_u64 tmp; \
133116
tmp = W[a][0]; \
@@ -160,15 +143,38 @@ sapphire::dispatch::EchoRoundFn echo_round = sapphire::soft_echo::FullStateRound
160143

161144
#define SHIFT_ROW3(a, b, c, d) SHIFT_ROW1(d, c, b, a)
162145

163-
#define BIG_SHIFT_ROWS do { \
164-
SHIFT_ROW1(1, 5, 9, 13); \
165-
SHIFT_ROW2(2, 6, 10, 14); \
166-
SHIFT_ROW3(3, 7, 11, 15); \
146+
SHIFT_ROW1(1, 5, 9, 13);
147+
SHIFT_ROW2(2, 6, 10, 14);
148+
SHIFT_ROW3(3, 7, 11, 15);
149+
150+
#undef SHIFT_ROW1
151+
#undef SHIFT_ROW2
152+
#undef SHIFT_ROW3
153+
}
154+
} // namespace soft_echo
155+
} // namespace sapphire
156+
157+
sapphire::dispatch::EchoMixCols echo_mix_columns = sapphire::soft_echo::MixColumns;
158+
sapphire::dispatch::EchoRoundFn echo_round = sapphire::soft_echo::FullStateRound;
159+
sapphire::dispatch::EchoShiftRows echo_shift_rows = sapphire::soft_echo::ShiftRows;
160+
161+
#define DECL_STATE_BIG \
162+
alignas(16) sph_u64 W[16][2];
163+
164+
#define INPUT_BLOCK_BIG(sc) do { \
165+
unsigned u; \
166+
memcpy(W, sc->u.Vb, 16 * sizeof(sph_u64)); \
167+
for (u = 0; u < 8; u ++) { \
168+
W[u + 8][0] = sph_dec64le_aligned( \
169+
sc->buf + 16 * u); \
170+
W[u + 8][1] = sph_dec64le_aligned( \
171+
sc->buf + 16 * u + 8); \
172+
} \
167173
} while (0)
168174

169175
#define BIG_ROUND do { \
170176
echo_round(W, K0, K1, K2, K3); \
171-
BIG_SHIFT_ROWS; \
177+
echo_shift_rows(W); \
172178
echo_mix_columns(W); \
173179
} while (0)
174180

src/crypto/x11/ssse3/echo.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,34 @@ void ALWAYS_INLINE MixColumn(uint64_t W[16][2], int ia, int ib, int ic, int id)
4949
// W[id] = abx ^ bcx ^ cdx ^ ab ^ c
5050
_mm_store_si128((__m128i*)&W[id][0], util::Xor(util::Xor(util::Xor(util::Xor(abx, bcx), cdx), ab), c));
5151
}
52+
53+
void ALWAYS_INLINE ShiftRow1(__m128i& Wa, __m128i& Wb, __m128i& Wc, __m128i& Wd)
54+
{
55+
__m128i tmp = Wa;
56+
Wa = Wb;
57+
Wb = Wc;
58+
Wc = Wd;
59+
Wd = tmp;
60+
}
61+
62+
void ALWAYS_INLINE ShiftRow2(__m128i& Wa, __m128i& Wb, __m128i& Wc, __m128i& Wd)
63+
{
64+
__m128i tmp1 = Wa;
65+
__m128i tmp2 = Wb;
66+
Wa = Wc;
67+
Wb = Wd;
68+
Wc = tmp1;
69+
Wd = tmp2;
70+
}
71+
72+
void ALWAYS_INLINE ShiftRow3(__m128i& Wa, __m128i& Wb, __m128i& Wc, __m128i& Wd)
73+
{
74+
__m128i tmp = Wd;
75+
Wd = Wc;
76+
Wc = Wb;
77+
Wb = Wa;
78+
Wa = tmp;
79+
}
5280
} // anonymous namespace
5381

5482
namespace ssse3_echo {
@@ -59,6 +87,48 @@ void MixColumns(uint64_t W[16][2])
5987
MixColumn(W, 8, 9, 10, 11);
6088
MixColumn(W, 12, 13, 14, 15);
6189
}
90+
91+
void ShiftRows(uint64_t W[16][2])
92+
{
93+
alignas(16) __m128i w[16];
94+
w[0] = _mm_load_si128((const __m128i*)&W[0][0]);
95+
w[1] = _mm_load_si128((const __m128i*)&W[1][0]);
96+
w[2] = _mm_load_si128((const __m128i*)&W[2][0]);
97+
w[3] = _mm_load_si128((const __m128i*)&W[3][0]);
98+
w[4] = _mm_load_si128((const __m128i*)&W[4][0]);
99+
w[5] = _mm_load_si128((const __m128i*)&W[5][0]);
100+
w[6] = _mm_load_si128((const __m128i*)&W[6][0]);
101+
w[7] = _mm_load_si128((const __m128i*)&W[7][0]);
102+
w[8] = _mm_load_si128((const __m128i*)&W[8][0]);
103+
w[9] = _mm_load_si128((const __m128i*)&W[9][0]);
104+
w[10] = _mm_load_si128((const __m128i*)&W[10][0]);
105+
w[11] = _mm_load_si128((const __m128i*)&W[11][0]);
106+
w[12] = _mm_load_si128((const __m128i*)&W[12][0]);
107+
w[13] = _mm_load_si128((const __m128i*)&W[13][0]);
108+
w[14] = _mm_load_si128((const __m128i*)&W[14][0]);
109+
w[15] = _mm_load_si128((const __m128i*)&W[15][0]);
110+
111+
ShiftRow1(w[1], w[5], w[9], w[13]);
112+
ShiftRow2(w[2], w[6], w[10], w[14]);
113+
ShiftRow3(w[3], w[7], w[11], w[15]);
114+
115+
_mm_store_si128((__m128i*)&W[0][0], w[0]);
116+
_mm_store_si128((__m128i*)&W[1][0], w[1]);
117+
_mm_store_si128((__m128i*)&W[2][0], w[2]);
118+
_mm_store_si128((__m128i*)&W[3][0], w[3]);
119+
_mm_store_si128((__m128i*)&W[4][0], w[4]);
120+
_mm_store_si128((__m128i*)&W[5][0], w[5]);
121+
_mm_store_si128((__m128i*)&W[6][0], w[6]);
122+
_mm_store_si128((__m128i*)&W[7][0], w[7]);
123+
_mm_store_si128((__m128i*)&W[8][0], w[8]);
124+
_mm_store_si128((__m128i*)&W[9][0], w[9]);
125+
_mm_store_si128((__m128i*)&W[10][0], w[10]);
126+
_mm_store_si128((__m128i*)&W[11][0], w[11]);
127+
_mm_store_si128((__m128i*)&W[12][0], w[12]);
128+
_mm_store_si128((__m128i*)&W[13][0], w[13]);
129+
_mm_store_si128((__m128i*)&W[14][0], w[14]);
130+
_mm_store_si128((__m128i*)&W[15][0], w[15]);
131+
}
62132
} // namespace ssse3_echo
63133
} // namespace sapphire
64134

0 commit comments

Comments
 (0)