diff --git a/library/aesce.c b/library/aesce.c
index 129867cb15f..1d30228f02c 100644
--- a/library/aesce.c
+++ b/library/aesce.c
@@ -115,6 +115,84 @@ int mbedtls_aesce_crypt_ecb( mbedtls_aes_context *ctx,
     return( 0 );
 }
 
+static inline uint8x16_t pmull_low( uint8x16_t a, uint8x16_t b )
+{
+    return( vreinterpretq_u8_p128(
+                vmull_p64(
+                    (poly64_t)vget_low_p64( vreinterpretq_p64_u8( a ) ),
+                    (poly64_t)vget_low_p64( vreinterpretq_p64_u8( b ) ) ) ) );
+}
+
+static inline uint8x16_t pmull_high(uint8x16_t a, uint8x16_t b)
+{
+    return( vreinterpretq_u8_p128(
+                vmull_high_p64( vreinterpretq_p64_u8( a ),
+                                vreinterpretq_p64_u8( b ) ) ) );
+}
+
+static inline uint8x16x2_t ghash_mult_128( uint8x16_t GH, uint8x16_t HK )
+{
+    uint8x16_t r0, r1, a, b, t0, t1;
+    uint8x16x2_t ret;
+
+    const uint8x16_t z = vdupq_n_u8( 0 );
+    a = GH;
+    b = HK;
+
+    r0 = pmull_low( a, b );
+    r1 = pmull_high( a, b );
+    t0 = vextq_u8( b, b, 8 );
+    t1 = pmull_low( a, t0 );
+    t0 = pmull_high( a, t0 );
+    t0 = veorq_u8( t0, t1 );
+    t1 = vextq_u8( z, t0, 8 );
+    r0 = veorq_u8( r0, t1 );
+    t1 = vextq_u8( t0, z, 8 );
+    r1 = veorq_u8( r1, t1 );
+    ret.val[0] = r0;
+    ret.val[1] = r1;
+    return( ret );
+}
+
+static inline uint8x16_t ghash_mult_rdc( uint8x16x2_t in )
+{
+    uint8x16_t a0, a1, t0, t1;
+
+    const uint8x16_t p = vreinterpretq_u8_u64( vdupq_n_u64( 0x87 ) );
+    const uint8x16_t zero = vdupq_n_u8( 0 );
+    a0 = in.val[0];
+    a1 = in.val[1];
+
+    /** polynomial reduction */
+    t0 = pmull_high( a1, p );
+    t1 = vextq_u8( t0, zero, 8 );
+    a1 = veorq_u8( a1, t1 );
+    t1 = vextq_u8( zero, t0, 8 );
+    a0 = veorq_u8( a0, t1 );
+    t0 = pmull_low( a1, p );
+
+    return( veorq_u8( a0, t0 ) );
+}
+
+static inline uint8x16_t ghash_mult(uint8x16_t GH_in, uint8x16_t HK_in)
+{
+    return( ghash_mult_rdc( ghash_mult_128( GH_in, HK_in ) ) );
+}
+
+/*
+ * GCM multiplication: c = a times b in GF(2^128)
+ */
+void mbedtls_aesce_gcm_mult( unsigned char c[16],
+                             const unsigned char a[16],
+                             const unsigned char b[16] )
+{
+    uint8x16_t *out = (uint8x16_t *)&c[0];
+    uint8x16_t *_a = (uint8x16_t *)&a[0];
+    uint8x16_t *_b = (uint8x16_t *)&b[0];
+    *out = vrbitq_u8( ghash_mult( vrbitq_u8( *_a ), vrbitq_u8( *_b) ) );
+
+    return;
+}
 
 /*
  * Compute decryption round keys from encryption round keys
diff --git a/library/aesce.h b/library/aesce.h
index 16adf9fe684..1fb3f8087f9 100644
--- a/library/aesce.h
+++ b/library/aesce.h
@@ -70,6 +70,22 @@ int mbedtls_aesce_crypt_ecb( mbedtls_aes_context *ctx,
                              const unsigned char input[16],
                              unsigned char output[16] );
 
+/**
+ * \brief          Internal GCM multiplication: c = a * b in GF(2^128)
+ *
+ * \note           This function is only for internal use by other library
+ *                 functions; you must not call it directly.
+ *
+ * \param c        Result
+ * \param a        First operand
+ * \param b        Second operand
+ *
+ * \note           Both operands and result are bit strings interpreted as
+ *                 elements of GF(2^128) as per the GCM spec.
+ */
+void mbedtls_aesce_gcm_mult( unsigned char c[16],
+                             const unsigned char a[16],
+                             const unsigned char b[16] );
 
 /**
  * \brief           Internal round key inversion. This function computes
diff --git a/library/gcm.c b/library/gcm.c
index 0178b5ba755..55fb365ae4d 100644
--- a/library/gcm.c
+++ b/library/gcm.c
@@ -42,6 +42,10 @@
 #include "aesni.h"
 #endif
 
+#if defined(MBEDTLS_AESCE_C)
+#include "aesce.h"
+#endif
+
 #if !defined(MBEDTLS_GCM_ALT)
 
 /*
@@ -91,6 +95,11 @@ static int gcm_gen_table( mbedtls_gcm_context *ctx )
         return( 0 );
 #endif
 
+#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64)
+    if( mbedtls_aesce_has_support( ) )
+        return( 0 );
+#endif
+
     /* 0 corresponds to 0 in GF(2^128) */
     ctx->HH[0] = 0;
     ctx->HL[0] = 0;
@@ -194,6 +203,21 @@ static void gcm_mult( mbedtls_gcm_context *ctx, const unsigned char x[16],
     }
 #endif /* MBEDTLS_AESNI_C && MBEDTLS_HAVE_X86_64 */
 
+#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64)
+    if( mbedtls_aesce_has_support( ) )
+    {
+        unsigned char h[16];
+
+        MBEDTLS_PUT_UINT32_BE( ctx->HH[8] >> 32, h,  0 );
+        MBEDTLS_PUT_UINT32_BE( ctx->HH[8],       h,  4 );
+        MBEDTLS_PUT_UINT32_BE( ctx->HL[8] >> 32, h,  8 );
+        MBEDTLS_PUT_UINT32_BE( ctx->HL[8],       h, 12 );
+
+        mbedtls_aesce_gcm_mult( output, x, h );
+        return;
+    }
+#endif
+
     lo = x[15] & 0xf;
 
     zh = ctx->HH[lo];