Skip to content

Commit

Permalink
do small changes to salsa sse2
Browse files Browse the repository at this point in the history
  • Loading branch information
oxarbitrage committed Sep 3, 2023
1 parent 814bee9 commit 639b85a
Showing 1 changed file with 29 additions and 32 deletions.
61 changes: 29 additions & 32 deletions salsa20/src/backends/sse2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ where
};

f.call(&mut backend);

state[8] = _mm_cvtsi128_si32(backend.v[2]) as u32;
}

Expand Down Expand Up @@ -67,49 +68,28 @@ unsafe fn rounds<R: Unsigned>(v: &[__m128i; 4]) -> [__m128i; 4] {
let mut res = *v;

for _ in 0..R::USIZE {
double_quarter_round(&mut res);
double_round(&mut res);
}

for i in 0..4 {
res[i] = _mm_add_epi32(res[i], v[i]);
}

transpose(&mut res);

res[1] = _mm_shuffle_epi32(res[1], 0b_10_01_00_11);
res[2] = _mm_shuffle_epi32(res[2], 0b_01_00_11_10);
res[3] = _mm_shuffle_epi32(res[3], 0b_00_11_10_01);

transpose(&mut res);

res
}

/// The Salsa20 doubleround function for SSE2.
///
/// https://users.rust-lang.org/t/can-the-compiler-infer-sse-instructions/59976
#[inline]
#[target_feature(enable = "sse2")]
unsafe fn double_quarter_round(v: &mut [__m128i; 4]) {
add_xor_rot(v);
}

#[inline]
#[target_feature(enable = "sse2")]
unsafe fn transpose([a, b, c, d]: &mut [__m128i; 4]) {
// attempt from https://randombit.net/bitbashing/posts/integer_matrix_transpose_in_sse2.html
let t0 = _mm_unpacklo_epi32(*a, *b);
let t1 = _mm_unpacklo_epi32(*c, *d);
let t2 = _mm_unpackhi_epi32(*a, *b);
let t3 = _mm_unpackhi_epi32(*c, *d);

/* Assigning transposed values back into I[0-3] */
*a = _mm_unpacklo_epi64(t0, t1);
*b = _mm_unpackhi_epi64(t0, t1);
*c = _mm_unpacklo_epi64(t2, t3);
*d = _mm_unpackhi_epi64(t2, t3);
}

#[inline]
#[target_feature(enable = "sse2")]
unsafe fn add_xor_rot([a, b, c, d]: &mut [__m128i; 4]) {
unsafe fn double_round([a, b, c, d]: &mut [__m128i; 4]) {
let mut t_sum: __m128i;
let mut t_rotl: __m128i;

Expand All @@ -131,9 +111,9 @@ unsafe fn add_xor_rot([a, b, c, d]: &mut [__m128i; 4]) {
*a = _mm_xor_si128(*a, t_rotl);

// Rearrange data.
*b = _mm_shuffle_epi32(*b, 0x93); // 10_01_00_11
*c = _mm_shuffle_epi32(*c, 0x4E); // 01_00_11_10
*d = _mm_shuffle_epi32(*d, 0x39); // 00_11_10_01
*b = _mm_shuffle_epi32(*b, 0b_10_01_00_11);
*c = _mm_shuffle_epi32(*c, 0b_01_00_11_10);
*d = _mm_shuffle_epi32(*d, 0b_00_11_10_01);

// Operate on "rows".
t_sum = _mm_add_epi32(*a, *b);
Expand All @@ -153,7 +133,24 @@ unsafe fn add_xor_rot([a, b, c, d]: &mut [__m128i; 4]) {
*a = _mm_xor_si128(*a, t_rotl);

// Rearrange data.
*b = _mm_shuffle_epi32(*b, 0x39); // 00_11_10_01
*c = _mm_shuffle_epi32(*c, 0x4E); // 01_00_11_10
*d = _mm_shuffle_epi32(*d, 0x93); // 10_01_00_11
*b = _mm_shuffle_epi32(*b, 0b_00_11_10_01);
*c = _mm_shuffle_epi32(*c, 0b_01_00_11_10);
*d = _mm_shuffle_epi32(*d, 0b_10_01_00_11);
}

/// Transpose an integer 4 by 4 matrix in SSE2.
///
/// https://randombit.net/bitbashing/posts/integer_matrix_transpose_in_sse2.html
#[inline]
#[target_feature(enable = "sse2")]
unsafe fn transpose([a, b, c, d]: &mut [__m128i; 4]) {
let t0 = _mm_unpacklo_epi32(*a, *b);
let t1 = _mm_unpacklo_epi32(*c, *d);
let t2 = _mm_unpackhi_epi32(*a, *b);
let t3 = _mm_unpackhi_epi32(*c, *d);

*a = _mm_unpacklo_epi64(t0, t1);
*b = _mm_unpackhi_epi64(t0, t1);
*c = _mm_unpacklo_epi64(t2, t3);
*d = _mm_unpackhi_epi64(t2, t3);
}

0 comments on commit 639b85a

Please sign in to comment.