Skip to content

Commit

Permalink
Merge pull request #42 from mmozeiko/v0.4
Browse files Browse the repository at this point in the history
Fixes for v0.4
  • Loading branch information
cmuratori authored Nov 11, 2018
2 parents cc046c7 + bc7f41b commit ed8c632
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 23 deletions.
10 changes: 5 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ matrix:
# test if meow_hash.h compiles as C
- touch test.c
- |-
"${CC}" -c -maes -include meow_intrinsics.h -include meow_hash.h test.c
"${CC}" -c -mssse3 -maes -include meow_intrinsics.h -include meow_hash.h test.c
# build & test
- cmd.exe /C build.bat
- build_clang/meow_test.exe
Expand All @@ -50,7 +50,7 @@ matrix:
script:
# test if meow_hash.h compiles as C
- touch test.c
- ${CC} -c -maes -include meow_intrinsics.h -include meow_hash.h test.c
- ${CC} -c -mssse3 -maes -include meow_intrinsics.h -include meow_hash.h test.c
# build & test
- ./build.sh
- build/meow_test.exe
Expand All @@ -75,7 +75,7 @@ matrix:
script:
# test if meow_hash.h compiles as C
- touch test.c
- ${CC} -c -maes -include meow_intrinsics.h -include meow_hash.h test.c
- ${CC} -c -mssse3 -maes -include meow_intrinsics.h -include meow_hash.h test.c
# build & test
- ./build.sh
- ./build/meow_test
Expand All @@ -99,7 +99,7 @@ matrix:
script:
# test if meow_hash.h compiles as C
- touch test.c
- ${CC} -c -maes -include meow_intrinsics.h -include meow_hash.h test.c
- ${CC} -c -mssse3 -maes -include meow_intrinsics.h -include meow_hash.h test.c
# build & test
- ./build.sh
- ./build/meow_test
Expand Down Expand Up @@ -149,7 +149,7 @@ matrix:
script:
# test if meow_hash.h compiles as C
- touch test.c
- ${CC} -c -maes -include meow_intrinsics.h -include meow_hash.h test.c
- ${CC} -c -mssse3 -maes -include meow_intrinsics.h -include meow_hash.h test.c
# build & test
- ./build.sh
- ./build/meow_test
Expand Down
10 changes: 5 additions & 5 deletions meow_hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,17 +206,17 @@ MeowHash_Accelerated(meow_u64 Seed, meow_u64 TotalLengthInBytes, void *SourceIni
Align = 0;
}

meow_aes_128 PartialState = Meow128_Shuffle_Mem(Overhang - Align, &MeowShiftAdjust[Align]);
meow_u128 Partial = Meow128_Shuffle_Mem(Overhang - Align, &MeowShiftAdjust[Align]);

PartialState = Meow128_And_Mem( PartialState, &MeowMaskLen[16 - Len8] );
S3 = Meow128_AESDEC(S3, PartialState);
Partial = Meow128_And_Mem( Partial, &MeowMaskLen[16 - Len8] );
S3 = Meow128_AESDEC(S3, Partial);
}
else
{
// NOTE(casey): We don't have to do Jeff's heroics when we know the
// buffer is aligned, since we cannot span a memory page (by definition).
meow_u128 PartialState = Meow128_And_Mem(*(meow_u128 *)Overhang, &MeowMaskLen[16 - Len8]);
S3 = Meow128_AESDEC(S3, PartialState);
meow_u128 Partial = Meow128_And_Mem(*(meow_u128 *)Overhang, &MeowMaskLen[16 - Len8]);
S3 = Meow128_AESDEC(S3, Partial);
}
}

Expand Down
19 changes: 14 additions & 5 deletions meow_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,13 @@
#define Meow128_AESDEC_Finalize(A) (A)
#define Meow128_Set64x2(Low64, High64) _mm_set_epi64x((High64), (Low64))
#define Meow128_Set64x2_State(Low64, High64) Meow128_Set64x2(Low64, High64)
#define Meow128_GetAESConstant(Ptr) (*(meow_u128 *)(Ptr));
#define Meow128_GetAESConstant(Ptr) (*(meow_u128 *)(Ptr))

#define Meow128_And_Mem(A,B) _mm_and_si128((A),_mm_loadu_si128((meow_u128 *)(B)))
#define Meow128_Shuffle_Mem(Mem,Control) _mm_shuffle_epi8(_mm_loadu_si128((meow_u128 *)(Mem)),_mm_loadu_si128((meow_u128 *)(Control)))

// TODO(casey): Not sure if this should actually be Meow128_Zero(A) ((A) = _mm_setzero_si128()), maybe
#define Meow128_Zero() _mm_setzero_si128()
#define Meow128_ZeroState() Meow128_Zero()

#define Meow256_AESDEC(Prior, XOr) _mm256_aesdec_epi128((Prior), (XOr))
#define Meow256_AESDEC_Mem(Prior, XOr) _mm256_aesdec_epi128((Prior), *(meow_u256 *)(XOr))
Expand Down Expand Up @@ -141,8 +140,11 @@ typedef struct {
meow_u128 B;
} meow_aes_128;

#define MeowU32From(A, I) (vgetq_lane_u32(vreinterpretq_u32_u8((A)), (I)))
#define MeowU64From(A, I) (vgetq_lane_u64(vreinterpretq_u64_u8((A)), (I)))

static int
Meow128_AreEqual(meow_u128 A, meow_u128 B)
MeowHashesAreEqualImpl(meow_u128 A, meow_u128 B)
{
uint8x16_t Powers = {
1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128,
Expand All @@ -157,6 +159,8 @@ Meow128_AreEqual(meow_u128 A, meow_u128 B)
return Output == 0xFFFF;
}

#define MeowHashesAreEqual(A, B) MeowHashesAreEqualImpl((A), (B))

static meow_aes_128
Meow128_AESDEC(meow_aes_128 Prior, meow_u128 Xor)
{
Expand Down Expand Up @@ -190,10 +194,11 @@ Meow128_Zero()
}

static meow_aes_128
Meow128_ZeroState()
Meow128_GetAESConstant(const meow_u8 *Ptr)
{
meow_aes_128 R;
R.A = R.B = vdupq_n_u8(0);
R.A = vld1q_u8(Ptr);
R.B = vdupq_n_u8(0);
return(R);
}

Expand All @@ -213,6 +218,9 @@ Meow128_Set64x2_State(meow_u64 Low64, meow_u64 High64)
return(R);
}

#define Meow128_And_Mem(A,B) vandq_u8((A), vld1q_u8((meow_u8 *)B))
#define Meow128_Shuffle_Mem(Mem,Control) vqtbl1q_u8(vld1q_u8((meow_u8 *)(Mem)),vld1q_u8((meow_u8 *)(Control)))

#endif

#define MEOW_HASH_VERSION 4
Expand Down Expand Up @@ -245,6 +253,7 @@ typedef meow_u128 meow_hash;

#endif

typedef struct meow_hash_state meow_hash_state;
typedef meow_hash meow_hash_implementation(meow_u64 Seed, meow_u64 Len, void *Source);
typedef void meow_absorb_implementation(struct meow_hash_state *State, meow_u64 Len, void *Source);

Expand Down
4 changes: 4 additions & 0 deletions more/meow_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,11 @@ struct input_size_test
meow_u64 Size;
};

#ifdef __aarch64__
#define MAX_SIZE_TO_TEST Gb(1)
#else
#define MAX_SIZE_TO_TEST Gb(2)
#endif
#define SIZE_TYPE_COUNT 64
#define SIZE_COUNT_PER_BATCH 16
struct input_size_tests
Expand Down
16 changes: 8 additions & 8 deletions more/meow_more.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ MeowHashBegin(meow_hash_state *State)
static void
MeowHashAbsorbBlocks(meow_hash_state *State, meow_u64 BlockCount, meow_u8 *Source)
{
meow_u128 S0 = State->S0;
meow_u128 S1 = State->S1;
meow_u128 S2 = State->S2;
meow_u128 S3 = State->S3;
meow_aes_128 S0 = State->S0;
meow_aes_128 S1 = State->S1;
meow_aes_128 S2 = State->S2;
meow_aes_128 S3 = State->S3;

while(BlockCount--)
{
Expand Down Expand Up @@ -138,11 +138,11 @@ MeowHashEnd(meow_hash_state *State, meow_u64 Seed)
{
Align = 0;
}

meow_u128 Partial = Meow128_Shuffle_Mem(Source - Align, &MeowShiftAdjust[Align]);

meow_aes_128 PartialState = Meow128_Shuffle_Mem(Source - Align, &MeowShiftAdjust[Align]);

PartialState = Meow128_And_Mem( PartialState, &MeowMaskLen[16 - Len] );
S3 = Meow128_AESDEC(S3, PartialState);
Partial = Meow128_And_Mem( Partial, &MeowMaskLen[16 - Len] );
S3 = Meow128_AESDEC(S3, Partial);
}

meow_u128 Mixer = Meow128_Set64x2(Seed - State->TotalLengthInBytes,
Expand Down

0 comments on commit ed8c632

Please sign in to comment.