From 6a290c26b64fb41f4681afbe92a68f759865fb7e Mon Sep 17 00:00:00 2001 From: Martins Mozeiko Date: Fri, 9 Nov 2018 21:27:37 -0800 Subject: [PATCH 1/4] Forward declare meow_hash_state for C compiler --- meow_intrinsics.h | 1 + 1 file changed, 1 insertion(+) diff --git a/meow_intrinsics.h b/meow_intrinsics.h index 6b8998a..aa85321 100644 --- a/meow_intrinsics.h +++ b/meow_intrinsics.h @@ -245,6 +245,7 @@ typedef meow_u128 meow_hash; #endif +typedef struct meow_hash_state meow_hash_state; typedef meow_hash meow_hash_implementation(meow_u64 Seed, meow_u64 Len, void *Source); typedef void meow_absorb_implementation(struct meow_hash_state *State, meow_u64 Len, void *Source); From bc031b9adb52b2d951029ce21f5f94c218783419 Mon Sep 17 00:00:00 2001 From: Martins Mozeiko Date: Fri, 9 Nov 2018 21:28:52 -0800 Subject: [PATCH 2/4] SSSE3 now is mandatory --- .travis.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1bd365b..9622f7d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,7 +33,7 @@ matrix: # test if meow_hash.h compiles as C - touch test.c - |- - "${CC}" -c -maes -include meow_intrinsics.h -include meow_hash.h test.c + "${CC}" -c -mssse3 -maes -include meow_intrinsics.h -include meow_hash.h test.c # build & test - cmd.exe /C build.bat - build_clang/meow_test.exe @@ -50,7 +50,7 @@ matrix: script: # test if meow_hash.h compiles as C - touch test.c - - ${CC} -c -maes -include meow_intrinsics.h -include meow_hash.h test.c + - ${CC} -c -mssse3 -maes -include meow_intrinsics.h -include meow_hash.h test.c # build & test - ./build.sh - build/meow_test.exe @@ -75,7 +75,7 @@ matrix: script: # test if meow_hash.h compiles as C - touch test.c - - ${CC} -c -maes -include meow_intrinsics.h -include meow_hash.h test.c + - ${CC} -c -mssse3 -maes -include meow_intrinsics.h -include meow_hash.h test.c # build & test - ./build.sh - ./build/meow_test @@ -99,7 +99,7 @@ matrix: script: # test if meow_hash.h compiles as C - touch test.c - - ${CC} -c -maes -include meow_intrinsics.h -include meow_hash.h test.c + - ${CC} -c -mssse3 -maes -include meow_intrinsics.h -include meow_hash.h test.c # build & test - ./build.sh - ./build/meow_test @@ -149,7 +149,7 @@ matrix: script: # test if meow_hash.h compiles as C - touch test.c - - ${CC} -c -maes -include meow_intrinsics.h -include meow_hash.h test.c + - ${CC} -c -mssse3 -maes -include meow_intrinsics.h -include meow_hash.h test.c # build & test - ./build.sh - ./build/meow_test From c88894f633baadc762d94e1b58ee84ae82f84a6b Mon Sep 17 00:00:00 2001 From: Martins Mozeiko Date: Fri, 9 Nov 2018 23:13:49 -0800 Subject: [PATCH 3/4] Fixes ARMv8 build --- meow_hash.h | 10 +++++----- meow_intrinsics.h | 18 +++++++++++++----- more/meow_more.h | 16 ++++++++-------- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/meow_hash.h b/meow_hash.h index 65e364c..6ebe43b 100644 --- a/meow_hash.h +++ b/meow_hash.h @@ -206,17 +206,17 @@ MeowHash_Accelerated(meow_u64 Seed, meow_u64 TotalLengthInBytes, void *SourceIni Align = 0; } - meow_aes_128 PartialState = Meow128_Shuffle_Mem(Overhang - Align, &MeowShiftAdjust[Align]); + meow_u128 Partial = Meow128_Shuffle_Mem(Overhang - Align, &MeowShiftAdjust[Align]); - PartialState = Meow128_And_Mem( PartialState, &MeowMaskLen[16 - Len8] ); - S3 = Meow128_AESDEC(S3, PartialState); + Partial = Meow128_And_Mem( Partial, &MeowMaskLen[16 - Len8] ); + S3 = Meow128_AESDEC(S3, Partial); } else { // NOTE(casey): We don't have to do Jeff's heroics when we know the // buffer is aligned, since we cannot span a memory page (by definition). - meow_u128 PartialState = Meow128_And_Mem(*(meow_u128 *)Overhang, &MeowMaskLen[16 - Len8]); - S3 = Meow128_AESDEC(S3, PartialState); + meow_u128 Partial = Meow128_And_Mem(*(meow_u128 *)Overhang, &MeowMaskLen[16 - Len8]); + S3 = Meow128_AESDEC(S3, Partial); } } diff --git a/meow_intrinsics.h b/meow_intrinsics.h index aa85321..e6e62ae 100644 --- a/meow_intrinsics.h +++ b/meow_intrinsics.h @@ -95,14 +95,13 @@ #define Meow128_AESDEC_Finalize(A) (A) #define Meow128_Set64x2(Low64, High64) _mm_set_epi64x((High64), (Low64)) #define Meow128_Set64x2_State(Low64, High64) Meow128_Set64x2(Low64, High64) -#define Meow128_GetAESConstant(Ptr) (*(meow_u128 *)(Ptr)); +#define Meow128_GetAESConstant(Ptr) (*(meow_u128 *)(Ptr)) #define Meow128_And_Mem(A,B) _mm_and_si128((A),_mm_loadu_si128((meow_u128 *)(B))) #define Meow128_Shuffle_Mem(Mem,Control) _mm_shuffle_epi8(_mm_loadu_si128((meow_u128 *)(Mem)),_mm_loadu_si128((meow_u128 *)(Control))) // TODO(casey): Not sure if this should actually be Meow128_Zero(A) ((A) = _mm_setzero_si128()), maybe #define Meow128_Zero() _mm_setzero_si128() -#define Meow128_ZeroState() Meow128_Zero() #define Meow256_AESDEC(Prior, XOr) _mm256_aesdec_epi128((Prior), (XOr)) #define Meow256_AESDEC_Mem(Prior, XOr) _mm256_aesdec_epi128((Prior), *(meow_u256 *)(XOr)) @@ -141,8 +140,11 @@ typedef struct { meow_u128 B; } meow_aes_128; +#define MeowU32From(A, I) (vgetq_lane_u32(vreinterpretq_u32_u8((A)), (I))) +#define MeowU64From(A, I) (vgetq_lane_u64(vreinterpretq_u64_u8((A)), (I))) + static int -Meow128_AreEqual(meow_u128 A, meow_u128 B) +MeowHashesAreEqualImpl(meow_u128 A, meow_u128 B) { uint8x16_t Powers = { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128, @@ -157,6 +159,8 @@ Meow128_AreEqual(meow_u128 A, meow_u128 B) return Output == 0xFFFF; } +#define MeowHashesAreEqual(A, B) MeowHashesAreEqualImpl((A), (B)) + static meow_aes_128 Meow128_AESDEC(meow_aes_128 Prior, meow_u128 Xor) { @@ -190,10 +194,11 @@ Meow128_Zero() } static meow_aes_128 -Meow128_ZeroState() +Meow128_GetAESConstant(const meow_u8 *Ptr) { meow_aes_128 R; - R.A = R.B = vdupq_n_u8(0); + R.A = vld1q_u8(Ptr); + R.B = vdupq_n_u8(0); return(R); } @@ -213,6 +218,9 @@ Meow128_Set64x2_State(meow_u64 Low64, meow_u64 High64) return(R); } +#define Meow128_And_Mem(A,B) vandq_u8((A), vld1q_u8((meow_u8 *)B)) +#define Meow128_Shuffle_Mem(Mem,Control) vqtbl1q_u8(vld1q_u8((meow_u8 *)(Mem)),vld1q_u8((meow_u8 *)(Control))) + #endif #define MEOW_HASH_VERSION 4 diff --git a/more/meow_more.h b/more/meow_more.h index f29e429..5a16e05 100644 --- a/more/meow_more.h +++ b/more/meow_more.h @@ -39,10 +39,10 @@ MeowHashBegin(meow_hash_state *State) static void MeowHashAbsorbBlocks(meow_hash_state *State, meow_u64 BlockCount, meow_u8 *Source) { - meow_u128 S0 = State->S0; - meow_u128 S1 = State->S1; - meow_u128 S2 = State->S2; - meow_u128 S3 = State->S3; + meow_aes_128 S0 = State->S0; + meow_aes_128 S1 = State->S1; + meow_aes_128 S2 = State->S2; + meow_aes_128 S3 = State->S3; while(BlockCount--) { @@ -138,11 +138,11 @@ MeowHashEnd(meow_hash_state *State, meow_u64 Seed) { Align = 0; } + + meow_u128 Partial = Meow128_Shuffle_Mem(Source - Align, &MeowShiftAdjust[Align]); - meow_aes_128 PartialState = Meow128_Shuffle_Mem(Source - Align, &MeowShiftAdjust[Align]); - - PartialState = Meow128_And_Mem( PartialState, &MeowMaskLen[16 - Len] ); - S3 = Meow128_AESDEC(S3, PartialState); + Partial = Meow128_And_Mem( Partial, &MeowMaskLen[16 - Len] ); + S3 = Meow128_AESDEC(S3, Partial); } meow_u128 Mixer = Meow128_Set64x2(Seed - State->TotalLengthInBytes, From bc7f41b330510a9e12b023a5cb82c74825561b9b Mon Sep 17 00:00:00 2001 From: Martins Mozeiko Date: Fri, 9 Nov 2018 23:48:35 -0800 Subject: [PATCH 4/4] Reduce max size for benchmark on ARMv8 --- more/meow_bench.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/more/meow_bench.cpp b/more/meow_bench.cpp index 87539c3..e36bc1b 100644 --- a/more/meow_bench.cpp +++ b/more/meow_bench.cpp @@ -75,7 +75,11 @@ struct input_size_test meow_u64 Size; }; +#ifdef __aarch64__ +#define MAX_SIZE_TO_TEST Gb(1) +#else #define MAX_SIZE_TO_TEST Gb(2) +#endif #define SIZE_TYPE_COUNT 64 #define SIZE_COUNT_PER_BATCH 16 struct input_size_tests