-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
arm64/crypto: SHA-1 using ARMv8 Crypto Extensions
This patch adds support for the SHA-1 Secure Hash Algorithm for CPUs that have support for the SHA-1 part of the ARM v8 Crypto Extensions. Change-Id: I29fafd308e17aff6e0d59938c106fae6ad7fe78e Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Herbert Xu <herbert@gondor.apana.org.au> Conflicts: arch/arm64/Makefile
- Loading branch information
Showing
6 changed files
with
359 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
|
||
menuconfig ARM64_CRYPTO | ||
bool "ARM64 Accelerated Cryptographic Algorithms" | ||
depends on ARM64 | ||
help | ||
Say Y here to choose from a selection of cryptographic algorithms | ||
implemented using ARM64 specific CPU features or instructions. | ||
|
||
if ARM64_CRYPTO | ||
|
||
config CRYPTO_SHA1_ARM64_CE | ||
tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)" | ||
depends on ARM64 && KERNEL_MODE_NEON | ||
select CRYPTO_HASH | ||
|
||
endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# | ||
# linux/arch/arm64/crypto/Makefile | ||
# | ||
# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
# | ||
# This program is free software; you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License version 2 as | ||
# published by the Free Software Foundation. | ||
# | ||
|
||
obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o | ||
sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
/* | ||
* sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions | ||
* | ||
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License version 2 as | ||
* published by the Free Software Foundation. | ||
*/ | ||
|
||
#include <linux/linkage.h> | ||
#include <asm/assembler.h> | ||
|
||
.text | ||
.arch armv8-a+crypto | ||
|
||
k0 .req v0 | ||
k1 .req v1 | ||
k2 .req v2 | ||
k3 .req v3 | ||
|
||
t0 .req v4 | ||
t1 .req v5 | ||
|
||
dga .req q6 | ||
dgav .req v6 | ||
dgb .req s7 | ||
dgbv .req v7 | ||
|
||
dg0q .req q12 | ||
dg0s .req s12 | ||
dg0v .req v12 | ||
dg1s .req s13 | ||
dg1v .req v13 | ||
dg2s .req s14 | ||
|
||
.macro add_only, op, ev, rc, s0, dg1 | ||
.ifc \ev, ev | ||
add t1.4s, v\s0\().4s, \rc\().4s | ||
sha1h dg2s, dg0s | ||
.ifnb \dg1 | ||
sha1\op dg0q, \dg1, t0.4s | ||
.else | ||
sha1\op dg0q, dg1s, t0.4s | ||
.endif | ||
.else | ||
.ifnb \s0 | ||
add t0.4s, v\s0\().4s, \rc\().4s | ||
.endif | ||
sha1h dg1s, dg0s | ||
sha1\op dg0q, dg2s, t1.4s | ||
.endif | ||
.endm | ||
|
||
.macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 | ||
sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s | ||
add_only \op, \ev, \rc, \s1, \dg1 | ||
sha1su1 v\s0\().4s, v\s3\().4s | ||
.endm | ||
|
||
/* | ||
* The SHA1 round constants | ||
*/ | ||
.align 4 | ||
.Lsha1_rcon: | ||
.word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 | ||
|
||
/* | ||
* void sha1_ce_transform(int blocks, u8 const *src, u32 *state, | ||
* u8 *head, long bytes) | ||
*/ | ||
ENTRY(sha1_ce_transform) | ||
/* load round constants */ | ||
adr x6, .Lsha1_rcon | ||
ld1r {k0.4s}, [x6], #4 | ||
ld1r {k1.4s}, [x6], #4 | ||
ld1r {k2.4s}, [x6], #4 | ||
ld1r {k3.4s}, [x6] | ||
|
||
/* load state */ | ||
ldr dga, [x2] | ||
ldr dgb, [x2, #16] | ||
|
||
/* load partial state (if supplied) */ | ||
cbz x3, 0f | ||
ld1 {v8.4s-v11.4s}, [x3] | ||
b 1f | ||
|
||
/* load input */ | ||
0: ld1 {v8.4s-v11.4s}, [x1], #64 | ||
sub w0, w0, #1 | ||
|
||
1: | ||
CPU_LE( rev32 v8.16b, v8.16b ) | ||
CPU_LE( rev32 v9.16b, v9.16b ) | ||
CPU_LE( rev32 v10.16b, v10.16b ) | ||
CPU_LE( rev32 v11.16b, v11.16b ) | ||
|
||
2: add t0.4s, v8.4s, k0.4s | ||
mov dg0v.16b, dgav.16b | ||
|
||
add_update c, ev, k0, 8, 9, 10, 11, dgb | ||
add_update c, od, k0, 9, 10, 11, 8 | ||
add_update c, ev, k0, 10, 11, 8, 9 | ||
add_update c, od, k0, 11, 8, 9, 10 | ||
add_update c, ev, k1, 8, 9, 10, 11 | ||
|
||
add_update p, od, k1, 9, 10, 11, 8 | ||
add_update p, ev, k1, 10, 11, 8, 9 | ||
add_update p, od, k1, 11, 8, 9, 10 | ||
add_update p, ev, k1, 8, 9, 10, 11 | ||
add_update p, od, k2, 9, 10, 11, 8 | ||
|
||
add_update m, ev, k2, 10, 11, 8, 9 | ||
add_update m, od, k2, 11, 8, 9, 10 | ||
add_update m, ev, k2, 8, 9, 10, 11 | ||
add_update m, od, k2, 9, 10, 11, 8 | ||
add_update m, ev, k3, 10, 11, 8, 9 | ||
|
||
add_update p, od, k3, 11, 8, 9, 10 | ||
add_only p, ev, k3, 9 | ||
add_only p, od, k3, 10 | ||
add_only p, ev, k3, 11 | ||
add_only p, od | ||
|
||
/* update state */ | ||
add dgbv.2s, dgbv.2s, dg1v.2s | ||
add dgav.4s, dgav.4s, dg0v.4s | ||
|
||
cbnz w0, 0b | ||
|
||
/* | ||
* Final block: add padding and total bit count. | ||
* Skip if we have no total byte count in x4. In that case, the input | ||
* size was not a round multiple of the block size, and the padding is | ||
* handled by the C code. | ||
*/ | ||
cbz x4, 3f | ||
movi v9.2d, #0 | ||
mov x8, #0x80000000 | ||
movi v10.2d, #0 | ||
ror x7, x4, #29 // ror(lsl(x4, 3), 32) | ||
fmov d8, x8 | ||
mov x4, #0 | ||
mov v11.d[0], xzr | ||
mov v11.d[1], x7 | ||
b 2b | ||
|
||
/* store new state */ | ||
3: str dga, [x2] | ||
str dgb, [x2, #16] | ||
ret | ||
ENDPROC(sha1_ce_transform) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
/* | ||
* sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions | ||
* | ||
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License version 2 as | ||
* published by the Free Software Foundation. | ||
*/ | ||
|
||
#include <asm/neon.h> | ||
#include <asm/unaligned.h> | ||
#include <crypto/internal/hash.h> | ||
#include <crypto/sha.h> | ||
#include <linux/cpufeature.h> | ||
#include <linux/crypto.h> | ||
#include <linux/module.h> | ||
|
||
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); | ||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | ||
MODULE_LICENSE("GPL v2"); | ||
|
||
asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, | ||
u8 *head, long bytes); | ||
|
||
static int sha1_init(struct shash_desc *desc) | ||
{ | ||
struct sha1_state *sctx = shash_desc_ctx(desc); | ||
|
||
*sctx = (struct sha1_state){ | ||
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
}; | ||
return 0; | ||
} | ||
|
||
static int sha1_update(struct shash_desc *desc, const u8 *data, | ||
unsigned int len) | ||
{ | ||
struct sha1_state *sctx = shash_desc_ctx(desc); | ||
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
|
||
sctx->count += len; | ||
|
||
if ((partial + len) >= SHA1_BLOCK_SIZE) { | ||
int blocks; | ||
|
||
if (partial) { | ||
int p = SHA1_BLOCK_SIZE - partial; | ||
|
||
memcpy(sctx->buffer + partial, data, p); | ||
data += p; | ||
len -= p; | ||
} | ||
|
||
blocks = len / SHA1_BLOCK_SIZE; | ||
len %= SHA1_BLOCK_SIZE; | ||
|
||
kernel_neon_begin_partial(16); | ||
sha1_ce_transform(blocks, data, sctx->state, | ||
partial ? sctx->buffer : NULL, 0); | ||
kernel_neon_end(); | ||
|
||
data += blocks * SHA1_BLOCK_SIZE; | ||
partial = 0; | ||
} | ||
if (len) | ||
memcpy(sctx->buffer + partial, data, len); | ||
return 0; | ||
} | ||
|
||
static int sha1_final(struct shash_desc *desc, u8 *out) | ||
{ | ||
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | ||
|
||
struct sha1_state *sctx = shash_desc_ctx(desc); | ||
__be64 bits = cpu_to_be64(sctx->count << 3); | ||
__be32 *dst = (__be32 *)out; | ||
int i; | ||
|
||
u32 padlen = SHA1_BLOCK_SIZE | ||
- ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE); | ||
|
||
sha1_update(desc, padding, padlen); | ||
sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
|
||
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) | ||
put_unaligned_be32(sctx->state[i], dst++); | ||
|
||
*sctx = (struct sha1_state){}; | ||
return 0; | ||
} | ||
|
||
static int sha1_finup(struct shash_desc *desc, const u8 *data, | ||
unsigned int len, u8 *out) | ||
{ | ||
struct sha1_state *sctx = shash_desc_ctx(desc); | ||
__be32 *dst = (__be32 *)out; | ||
int blocks; | ||
int i; | ||
|
||
if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) { | ||
sha1_update(desc, data, len); | ||
return sha1_final(desc, out); | ||
} | ||
|
||
/* | ||
* Use a fast path if the input is a multiple of 64 bytes. In | ||
* this case, there is no need to copy data around, and we can | ||
* perform the entire digest calculation in a single invocation | ||
* of sha1_ce_transform() | ||
*/ | ||
blocks = len / SHA1_BLOCK_SIZE; | ||
|
||
kernel_neon_begin_partial(16); | ||
sha1_ce_transform(blocks, data, sctx->state, NULL, len); | ||
kernel_neon_end(); | ||
|
||
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) | ||
put_unaligned_be32(sctx->state[i], dst++); | ||
|
||
*sctx = (struct sha1_state){}; | ||
return 0; | ||
} | ||
|
||
static int sha1_export(struct shash_desc *desc, void *out) | ||
{ | ||
struct sha1_state *sctx = shash_desc_ctx(desc); | ||
struct sha1_state *dst = out; | ||
|
||
*dst = *sctx; | ||
return 0; | ||
} | ||
|
||
static int sha1_import(struct shash_desc *desc, const void *in) | ||
{ | ||
struct sha1_state *sctx = shash_desc_ctx(desc); | ||
struct sha1_state const *src = in; | ||
|
||
*sctx = *src; | ||
return 0; | ||
} | ||
|
||
static struct shash_alg alg = { | ||
.init = sha1_init, | ||
.update = sha1_update, | ||
.final = sha1_final, | ||
.finup = sha1_finup, | ||
.export = sha1_export, | ||
.import = sha1_import, | ||
.descsize = sizeof(struct sha1_state), | ||
.digestsize = SHA1_DIGEST_SIZE, | ||
.statesize = sizeof(struct sha1_state), | ||
.base = { | ||
.cra_name = "sha1", | ||
.cra_driver_name = "sha1-ce", | ||
.cra_priority = 200, | ||
.cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
.cra_blocksize = SHA1_BLOCK_SIZE, | ||
.cra_module = THIS_MODULE, | ||
} | ||
}; | ||
|
||
static int __init sha1_ce_mod_init(void) | ||
{ | ||
return crypto_register_shash(&alg); | ||
} | ||
|
||
static void __exit sha1_ce_mod_fini(void) | ||
{ | ||
crypto_unregister_shash(&alg); | ||
} | ||
|
||
module_cpu_feature_match(SHA1, sha1_ce_mod_init); | ||
module_exit(sha1_ce_mod_fini); |