Skip to content

Commit

Permalink
crypto: x86/chacha - expose SIMD ChaCha routine as library function
Browse files Browse the repository at this point in the history
Wire the existing x86 SIMD ChaCha code into the new ChaCha library
interface, so that users of the library interface will get the
accelerated version when available.

Given that calls into the library API will always go through the
routines in this module if it is enabled, switch to static keys
to select the optimal implementation available (which may be none
at all, in which case we defer to the generic implementation for
all invocations).

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
  • Loading branch information
ardbiesheuvel authored and herbertx committed Nov 17, 2019
1 parent 28e8d89 commit 84e03fa
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 25 deletions.
91 changes: 66 additions & 25 deletions arch/x86/crypto/chacha_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
#ifdef CONFIG_AS_AVX2

asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
static bool chacha_use_avx2;
#ifdef CONFIG_AS_AVX512

asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
static bool chacha_use_avx512vl;
#endif
#endif

static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);

static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
{
Expand All @@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds)
{
#ifdef CONFIG_AS_AVX2
#ifdef CONFIG_AS_AVX512
if (chacha_use_avx512vl) {
if (IS_ENABLED(CONFIG_AS_AVX512) &&
static_branch_likely(&chacha_use_avx512vl)) {
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
chacha_8block_xor_avx512vl(state, dst, src, bytes,
nrounds);
Expand Down Expand Up @@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
return;
}
}
#endif
if (chacha_use_avx2) {

if (IS_ENABLED(CONFIG_AS_AVX2) &&
static_branch_likely(&chacha_use_avx2)) {
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 8;
Expand All @@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
return;
}
}
#endif

while (bytes >= CHACHA_BLOCK_SIZE * 4) {
chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 4;
Expand All @@ -123,6 +123,43 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
}
}

void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
{
state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);

if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
hchacha_block_generic(state, stream, nrounds);
} else {
kernel_fpu_begin();
hchacha_block_ssse3(state, stream, nrounds);
kernel_fpu_end();
}
}
EXPORT_SYMBOL(hchacha_block_arch);

void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
{
state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);

chacha_init_generic(state, key, iv);
}
EXPORT_SYMBOL(chacha_init_arch);

void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
int nrounds)
{
state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);

if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
bytes <= CHACHA_BLOCK_SIZE)
return chacha_crypt_generic(state, dst, src, bytes, nrounds);

kernel_fpu_begin();
chacha_dosimd(state, dst, src, bytes, nrounds);
kernel_fpu_end();
}
EXPORT_SYMBOL(chacha_crypt_arch);

static int chacha_simd_stream_xor(struct skcipher_request *req,
const struct chacha_ctx *ctx, const u8 *iv)
{
Expand All @@ -143,7 +180,8 @@ static int chacha_simd_stream_xor(struct skcipher_request *req,
if (nbytes < walk.total)
nbytes = round_down(nbytes, walk.stride);

if (!crypto_simd_usable()) {
if (!static_branch_likely(&chacha_use_simd) ||
!crypto_simd_usable()) {
chacha_crypt_generic(state, walk.dst.virt.addr,
walk.src.virt.addr, nbytes,
ctx->nrounds);
Expand Down Expand Up @@ -246,18 +284,21 @@ static struct skcipher_alg algs[] = {
static int __init chacha_simd_mod_init(void)
{
if (!boot_cpu_has(X86_FEATURE_SSSE3))
return -ENODEV;

#ifdef CONFIG_AS_AVX2
chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#ifdef CONFIG_AS_AVX512
chacha_use_avx512vl = chacha_use_avx2 &&
boot_cpu_has(X86_FEATURE_AVX512VL) &&
boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
#endif
#endif
return 0;

static_branch_enable(&chacha_use_simd);

if (IS_ENABLED(CONFIG_AS_AVX2) &&
boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
static_branch_enable(&chacha_use_avx2);

if (IS_ENABLED(CONFIG_AS_AVX512) &&
boot_cpu_has(X86_FEATURE_AVX512VL) &&
boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
static_branch_enable(&chacha_use_avx512vl);
}
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
}

Expand Down
1 change: 1 addition & 0 deletions crypto/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -1437,6 +1437,7 @@ config CRYPTO_CHACHA20_X86_64
depends on X86 && 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_LIB_CHACHA_GENERIC
select CRYPTO_ARCH_HAVE_LIB_CHACHA
help
SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
XChaCha20, and XChaCha12 stream ciphers.
Expand Down
6 changes: 6 additions & 0 deletions include/crypto/chacha.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
#define CHACHA_BLOCK_SIZE 64
#define CHACHAPOLY_IV_SIZE 12

#ifdef CONFIG_X86_64
#define CHACHA_STATE_WORDS ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32))
#else
#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32))
#endif

/* 192-bit nonce, then 64-bit stream position */
#define XCHACHA_IV_SIZE 32

Expand Down

0 comments on commit 84e03fa

Please sign in to comment.