Skip to content

Commit e1bbec4

Browse files
committed
crypto: avoid extra load/store in Echo512's ShiftAndMix()
1 parent 250dcce commit e1bbec4

File tree

1 file changed

+18
-18
lines changed

1 file changed

+18
-18
lines changed

src/crypto/x11/ssse3/echo.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ __m128i ALWAYS_INLINE gf8_mul2(const __m128i& x)
2525
return util::Xor(lhs, rhs);
2626
}
2727

28-
void ALWAYS_INLINE MixColumn(uint64_t W[16][2], int ia, int ib, int ic, int id)
28+
void ALWAYS_INLINE MixColumn(__m128i& Wa, __m128i& Wb, __m128i& Wc, __m128i& Wd)
2929
{
30-
const __m128i a = _mm_load_si128((const __m128i*)&W[ia][0]);
31-
const __m128i b = _mm_load_si128((const __m128i*)&W[ib][0]);
32-
const __m128i c = _mm_load_si128((const __m128i*)&W[ic][0]);
33-
const __m128i d = _mm_load_si128((const __m128i*)&W[id][0]);
30+
const __m128i a = Wa;
31+
const __m128i b = Wb;
32+
const __m128i c = Wc;
33+
const __m128i d = Wd;
3434

3535
const __m128i ab = util::Xor(a, b);
3636
const __m128i bc = util::Xor(b, c);
@@ -40,14 +40,14 @@ void ALWAYS_INLINE MixColumn(uint64_t W[16][2], int ia, int ib, int ic, int id)
4040
const __m128i bcx = gf8_mul2(bc);
4141
const __m128i cdx = gf8_mul2(cd);
4242

43-
// W[ia] = abx ^ bc ^ d
44-
_mm_store_si128((__m128i*)&W[ia][0], util::Xor(util::Xor(abx, bc), d));
45-
// W[ib] = bcx ^ a ^ cd
46-
_mm_store_si128((__m128i*)&W[ib][0], util::Xor(util::Xor(bcx, a), cd));
47-
// W[ic] = cdx ^ ab ^ d
48-
_mm_store_si128((__m128i*)&W[ic][0], util::Xor(util::Xor(cdx, ab), d));
49-
// W[id] = abx ^ bcx ^ cdx ^ ab ^ c
50-
_mm_store_si128((__m128i*)&W[id][0], util::Xor(util::Xor(util::Xor(util::Xor(abx, bcx), cdx), ab), c));
43+
// Wa = abx ^ bc ^ d
44+
Wa = util::Xor(util::Xor(abx, bc), d);
45+
// Wb = bcx ^ a ^ cd
46+
Wb = util::Xor(util::Xor(bcx, a), cd);
47+
// Wc = cdx ^ ab ^ d
48+
Wc = util::Xor(util::Xor(cdx, ab), d);
49+
// Wd = abx ^ bcx ^ cdx ^ ab ^ c
50+
Wd = util::Xor(util::Xor(util::Xor(util::Xor(abx, bcx), cdx), ab), c);
5151
}
5252

5353
void ALWAYS_INLINE ShiftRow1(__m128i& Wa, __m128i& Wb, __m128i& Wc, __m128i& Wd)
@@ -104,6 +104,11 @@ void ShiftAndMix(uint64_t W[16][2])
104104
ShiftRow2(w[2], w[6], w[10], w[14]);
105105
ShiftRow3(w[3], w[7], w[11], w[15]);
106106

107+
MixColumn(w[0], w[1], w[2], w[3]);
108+
MixColumn(w[4], w[5], w[6], w[7]);
109+
MixColumn(w[8], w[9], w[10], w[11]);
110+
MixColumn(w[12], w[13], w[14], w[15]);
111+
107112
_mm_store_si128((__m128i*)&W[0][0], w[0]);
108113
_mm_store_si128((__m128i*)&W[1][0], w[1]);
109114
_mm_store_si128((__m128i*)&W[2][0], w[2]);
@@ -120,11 +125,6 @@ void ShiftAndMix(uint64_t W[16][2])
120125
_mm_store_si128((__m128i*)&W[13][0], w[13]);
121126
_mm_store_si128((__m128i*)&W[14][0], w[14]);
122127
_mm_store_si128((__m128i*)&W[15][0], w[15]);
123-
124-
MixColumn(W, 0, 1, 2, 3);
125-
MixColumn(W, 4, 5, 6, 7);
126-
MixColumn(W, 8, 9, 10, 11);
127-
MixColumn(W, 12, 13, 14, 15);
128128
}
129129
} // namespace ssse3_echo
130130
} // namespace sapphire

0 commit comments

Comments
 (0)