Skip to content

Commit

Permalink
arm64/crypto: SHA-1 using ARMv8 Crypto Extensions
Browse files Browse the repository at this point in the history
This patch adds support for the SHA-1 Secure Hash Algorithm for CPUs that
have support for the SHA-1 part of the ARM v8 Crypto Extensions.

Change-Id: I29fafd308e17aff6e0d59938c106fae6ad7fe78e
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>

Conflicts:
	arch/arm64/Makefile
  • Loading branch information
jpa468 committed Aug 28, 2014
1 parent f56b1aa commit 42db8d1
Show file tree
Hide file tree
Showing 6 changed files with 359 additions and 0 deletions.
3 changes: 3 additions & 0 deletions arch/arm64/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -273,5 +273,8 @@ source "arch/arm64/Kconfig.debug"
source "security/Kconfig"

source "crypto/Kconfig"
if CRYPTO
source "arch/arm64/crypto/Kconfig"
endif

source "lib/Kconfig"
1 change: 1 addition & 0 deletions arch/arm64/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ TEXT_OFFSET := 0x00080000
export TEXT_OFFSET GZFLAGS

core-y += arch/arm64/kernel/ arch/arm64/mm/
core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
libs-y := arch/arm64/lib/ $(libs-y)
libs-y += $(LIBGCC)

Expand Down
16 changes: 16 additions & 0 deletions arch/arm64/crypto/Kconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

menuconfig ARM64_CRYPTO
bool "ARM64 Accelerated Cryptographic Algorithms"
depends on ARM64
help
Say Y here to choose from a selection of cryptographic algorithms
implemented using ARM64 specific CPU features or instructions.

if ARM64_CRYPTO

config CRYPTO_SHA1_ARM64_CE
tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_HASH

endif
12 changes: 12 additions & 0 deletions arch/arm64/crypto/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#
# linux/arch/arm64/crypto/Makefile
#
# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#

obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
153 changes: 153 additions & 0 deletions arch/arm64/crypto/sha1-ce-core.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
* sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
*
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/

#include <linux/linkage.h>
#include <asm/assembler.h>

.text
.arch armv8-a+crypto

k0 .req v0
k1 .req v1
k2 .req v2
k3 .req v3

t0 .req v4
t1 .req v5

dga .req q6
dgav .req v6
dgb .req s7
dgbv .req v7

dg0q .req q12
dg0s .req s12
dg0v .req v12
dg1s .req s13
dg1v .req v13
dg2s .req s14

.macro add_only, op, ev, rc, s0, dg1
.ifc \ev, ev
add t1.4s, v\s0\().4s, \rc\().4s
sha1h dg2s, dg0s
.ifnb \dg1
sha1\op dg0q, \dg1, t0.4s
.else
sha1\op dg0q, dg1s, t0.4s
.endif
.else
.ifnb \s0
add t0.4s, v\s0\().4s, \rc\().4s
.endif
sha1h dg1s, dg0s
sha1\op dg0q, dg2s, t1.4s
.endif
.endm

.macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
add_only \op, \ev, \rc, \s1, \dg1
sha1su1 v\s0\().4s, v\s3\().4s
.endm

/*
* The SHA1 round constants
*/
.align 4
.Lsha1_rcon:
.word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6

/*
* void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
* u8 *head, long bytes)
*/
ENTRY(sha1_ce_transform)
/* load round constants */
adr x6, .Lsha1_rcon
ld1r {k0.4s}, [x6], #4
ld1r {k1.4s}, [x6], #4
ld1r {k2.4s}, [x6], #4
ld1r {k3.4s}, [x6]

/* load state */
ldr dga, [x2]
ldr dgb, [x2, #16]

/* load partial state (if supplied) */
cbz x3, 0f
ld1 {v8.4s-v11.4s}, [x3]
b 1f

/* load input */
0: ld1 {v8.4s-v11.4s}, [x1], #64
sub w0, w0, #1

1:
CPU_LE( rev32 v8.16b, v8.16b )
CPU_LE( rev32 v9.16b, v9.16b )
CPU_LE( rev32 v10.16b, v10.16b )
CPU_LE( rev32 v11.16b, v11.16b )

2: add t0.4s, v8.4s, k0.4s
mov dg0v.16b, dgav.16b

add_update c, ev, k0, 8, 9, 10, 11, dgb
add_update c, od, k0, 9, 10, 11, 8
add_update c, ev, k0, 10, 11, 8, 9
add_update c, od, k0, 11, 8, 9, 10
add_update c, ev, k1, 8, 9, 10, 11

add_update p, od, k1, 9, 10, 11, 8
add_update p, ev, k1, 10, 11, 8, 9
add_update p, od, k1, 11, 8, 9, 10
add_update p, ev, k1, 8, 9, 10, 11
add_update p, od, k2, 9, 10, 11, 8

add_update m, ev, k2, 10, 11, 8, 9
add_update m, od, k2, 11, 8, 9, 10
add_update m, ev, k2, 8, 9, 10, 11
add_update m, od, k2, 9, 10, 11, 8
add_update m, ev, k3, 10, 11, 8, 9

add_update p, od, k3, 11, 8, 9, 10
add_only p, ev, k3, 9
add_only p, od, k3, 10
add_only p, ev, k3, 11
add_only p, od

/* update state */
add dgbv.2s, dgbv.2s, dg1v.2s
add dgav.4s, dgav.4s, dg0v.4s

cbnz w0, 0b

/*
* Final block: add padding and total bit count.
* Skip if we have no total byte count in x4. In that case, the input
* size was not a round multiple of the block size, and the padding is
* handled by the C code.
*/
cbz x4, 3f
movi v9.2d, #0
mov x8, #0x80000000
movi v10.2d, #0
ror x7, x4, #29 // ror(lsl(x4, 3), 32)
fmov d8, x8
mov x4, #0
mov v11.d[0], xzr
mov v11.d[1], x7
b 2b

/* store new state */
3: str dga, [x2]
str dgb, [x2, #16]
ret
ENDPROC(sha1_ce_transform)
174 changes: 174 additions & 0 deletions arch/arm64/crypto/sha1-ce-glue.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/*
* sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
*
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/

#include <asm/neon.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>

MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");

asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
u8 *head, long bytes);

static int sha1_init(struct shash_desc *desc)
{
struct sha1_state *sctx = shash_desc_ctx(desc);

*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
}

static int sha1_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;

sctx->count += len;

if ((partial + len) >= SHA1_BLOCK_SIZE) {
int blocks;

if (partial) {
int p = SHA1_BLOCK_SIZE - partial;

memcpy(sctx->buffer + partial, data, p);
data += p;
len -= p;
}

blocks = len / SHA1_BLOCK_SIZE;
len %= SHA1_BLOCK_SIZE;

kernel_neon_begin_partial(16);
sha1_ce_transform(blocks, data, sctx->state,
partial ? sctx->buffer : NULL, 0);
kernel_neon_end();

data += blocks * SHA1_BLOCK_SIZE;
partial = 0;
}
if (len)
memcpy(sctx->buffer + partial, data, len);
return 0;
}

static int sha1_final(struct shash_desc *desc, u8 *out)
{
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };

struct sha1_state *sctx = shash_desc_ctx(desc);
__be64 bits = cpu_to_be64(sctx->count << 3);
__be32 *dst = (__be32 *)out;
int i;

u32 padlen = SHA1_BLOCK_SIZE
- ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);

sha1_update(desc, padding, padlen);
sha1_update(desc, (const u8 *)&bits, sizeof(bits));

for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);

*sctx = (struct sha1_state){};
return 0;
}

static int sha1_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int blocks;
int i;

if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
sha1_update(desc, data, len);
return sha1_final(desc, out);
}

/*
* Use a fast path if the input is a multiple of 64 bytes. In
* this case, there is no need to copy data around, and we can
* perform the entire digest calculation in a single invocation
* of sha1_ce_transform()
*/
blocks = len / SHA1_BLOCK_SIZE;

kernel_neon_begin_partial(16);
sha1_ce_transform(blocks, data, sctx->state, NULL, len);
kernel_neon_end();

for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);

*sctx = (struct sha1_state){};
return 0;
}

static int sha1_export(struct shash_desc *desc, void *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
struct sha1_state *dst = out;

*dst = *sctx;
return 0;
}

static int sha1_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
struct sha1_state const *src = in;

*sctx = *src;
return 0;
}

static struct shash_alg alg = {
.init = sha1_init,
.update = sha1_update,
.final = sha1_final,
.finup = sha1_finup,
.export = sha1_export,
.import = sha1_import,
.descsize = sizeof(struct sha1_state),
.digestsize = SHA1_DIGEST_SIZE,
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};

static int __init sha1_ce_mod_init(void)
{
return crypto_register_shash(&alg);
}

static void __exit sha1_ce_mod_fini(void)
{
crypto_unregister_shash(&alg);
}

module_cpu_feature_match(SHA1, sha1_ce_mod_init);
module_exit(sha1_ce_mod_fini);

0 comments on commit 42db8d1

Please sign in to comment.