diff --git a/library/aesce.c b/library/aesce.c index 129867cb15f..1d30228f02c 100644 --- a/library/aesce.c +++ b/library/aesce.c @@ -115,6 +115,84 @@ int mbedtls_aesce_crypt_ecb( mbedtls_aes_context *ctx, return( 0 ); } +static inline uint8x16_t pmull_low( uint8x16_t a, uint8x16_t b ) +{ + return( vreinterpretq_u8_p128( + vmull_p64( + (poly64_t)vget_low_p64( vreinterpretq_p64_u8( a ) ), + (poly64_t)vget_low_p64( vreinterpretq_p64_u8( b ) ) ) ) ); +} + +static inline uint8x16_t pmull_high(uint8x16_t a, uint8x16_t b) +{ + return( vreinterpretq_u8_p128( + vmull_high_p64( vreinterpretq_p64_u8( a ), + vreinterpretq_p64_u8( b ) ) ) ); +} + +static inline uint8x16x2_t ghash_mult_128( uint8x16_t GH, uint8x16_t HK ) +{ + uint8x16_t r0, r1, a, b, t0, t1; + uint8x16x2_t ret; + + const uint8x16_t z = vdupq_n_u8( 0 ); + a = GH; + b = HK; + + r0 = pmull_low( a, b ); + r1 = pmull_high( a, b ); + t0 = vextq_u8( b, b, 8 ); + t1 = pmull_low( a, t0 ); + t0 = pmull_high( a, t0 ); + t0 = veorq_u8( t0, t1 ); + t1 = vextq_u8( z, t0, 8 ); + r0 = veorq_u8( r0, t1 ); + t1 = vextq_u8( t0, z, 8 ); + r1 = veorq_u8( r1, t1 ); + ret.val[0] = r0; + ret.val[1] = r1; + return( ret ); +} + +static inline uint8x16_t ghash_mult_rdc( uint8x16x2_t in ) +{ + uint8x16_t a0, a1, t0, t1; + + const uint8x16_t p = vreinterpretq_u8_u64( vdupq_n_u64( 0x87 ) ); + const uint8x16_t zero = vdupq_n_u8( 0 ); + a0 = in.val[0]; + a1 = in.val[1]; + + /** polynomial reduction */ + t0 = pmull_high( a1, p ); + t1 = vextq_u8( t0, zero, 8 ); + a1 = veorq_u8( a1, t1 ); + t1 = vextq_u8( zero, t0, 8 ); + a0 = veorq_u8( a0, t1 ); + t0 = pmull_low( a1, p ); + + return( veorq_u8( a0, t0 ) ); +} + +static inline uint8x16_t ghash_mult(uint8x16_t GH_in, uint8x16_t HK_in) +{ + return( ghash_mult_rdc( ghash_mult_128( GH_in, HK_in ) ) ); +} + +/* + * GCM multiplication: c = a times b in GF(2^128) + */ +void mbedtls_aesce_gcm_mult( unsigned char c[16], + const unsigned char a[16], + const unsigned char b[16] ) +{ + uint8x16_t *out = (uint8x16_t *)&c[0]; + uint8x16_t *_a = (uint8x16_t *)&a[0]; + uint8x16_t *_b = (uint8x16_t *)&b[0]; + *out = vrbitq_u8( ghash_mult( vrbitq_u8( *_a ), vrbitq_u8( *_b) ) ); + + return; +} /* * Compute decryption round keys from encryption round keys diff --git a/library/aesce.h b/library/aesce.h index 16adf9fe684..1fb3f8087f9 100644 --- a/library/aesce.h +++ b/library/aesce.h @@ -70,6 +70,22 @@ int mbedtls_aesce_crypt_ecb( mbedtls_aes_context *ctx, const unsigned char input[16], unsigned char output[16] ); +/** + * \brief Internal GCM multiplication: c = a * b in GF(2^128) + * + * \note This function is only for internal use by other library + * functions; you must not call it directly. + * + * \param c Result + * \param a First operand + * \param b Second operand + * + * \note Both operands and result are bit strings interpreted as + * elements of GF(2^128) as per the GCM spec. + */ +void mbedtls_aesce_gcm_mult( unsigned char c[16], + const unsigned char a[16], + const unsigned char b[16] ); /** * \brief Internal round key inversion. This function computes diff --git a/library/gcm.c b/library/gcm.c index 0178b5ba755..55fb365ae4d 100644 --- a/library/gcm.c +++ b/library/gcm.c @@ -42,6 +42,10 @@ #include "aesni.h" #endif +#if defined(MBEDTLS_AESCE_C) +#include "aesce.h" +#endif + #if !defined(MBEDTLS_GCM_ALT) /* @@ -91,6 +95,11 @@ static int gcm_gen_table( mbedtls_gcm_context *ctx ) return( 0 ); #endif +#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64) + if( mbedtls_aesce_has_support( ) ) + return( 0 ); +#endif + /* 0 corresponds to 0 in GF(2^128) */ ctx->HH[0] = 0; ctx->HL[0] = 0; @@ -194,6 +203,21 @@ static void gcm_mult( mbedtls_gcm_context *ctx, const unsigned char x[16], } #endif /* MBEDTLS_AESNI_C && MBEDTLS_HAVE_X86_64 */ +#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64) + if( mbedtls_aesce_has_support( ) ) + { + unsigned char h[16]; + + MBEDTLS_PUT_UINT32_BE( ctx->HH[8] >> 32, h, 0 ); + MBEDTLS_PUT_UINT32_BE( ctx->HH[8], h, 4 ); + MBEDTLS_PUT_UINT32_BE( ctx->HL[8] >> 32, h, 8 ); + MBEDTLS_PUT_UINT32_BE( ctx->HL[8], h, 12 ); + + mbedtls_aesce_gcm_mult( output, x, h ); + return; + } +#endif + lo = x[15] & 0xf; zh = ctx->HH[lo];